1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct in6_addr *saddr = NULL, *final_p, final; 142 struct inet_timewait_death_row *tcp_death_row; 143 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 memset(&fl6, 0, sizeof(fl6)); 160 161 if (inet6_test_bit(SNDFLOW, sk)) { 162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 163 IP6_ECN_flow_init(fl6.flowlabel); 164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 165 struct ip6_flowlabel *flowlabel; 166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 167 if (IS_ERR(flowlabel)) 168 return -EINVAL; 169 fl6_sock_release(flowlabel); 170 } 171 } 172 173 /* 174 * connect() to INADDR_ANY means loopback (BSD'ism). 175 */ 176 177 if (ipv6_addr_any(&usin->sin6_addr)) { 178 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 179 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 180 &usin->sin6_addr); 181 else 182 usin->sin6_addr = in6addr_loopback; 183 } 184 185 addr_type = ipv6_addr_type(&usin->sin6_addr); 186 187 if (addr_type & IPV6_ADDR_MULTICAST) 188 return -ENETUNREACH; 189 190 if (addr_type&IPV6_ADDR_LINKLOCAL) { 191 if (addr_len >= sizeof(struct sockaddr_in6) && 192 usin->sin6_scope_id) { 193 /* If interface is set while binding, indices 194 * must coincide. 195 */ 196 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 197 return -EINVAL; 198 199 sk->sk_bound_dev_if = usin->sin6_scope_id; 200 } 201 202 /* Connect to link-local address requires an interface */ 203 if (!sk->sk_bound_dev_if) 204 return -EINVAL; 205 } 206 207 if (tp->rx_opt.ts_recent_stamp && 208 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 209 tp->rx_opt.ts_recent = 0; 210 tp->rx_opt.ts_recent_stamp = 0; 211 WRITE_ONCE(tp->write_seq, 0); 212 } 213 214 sk->sk_v6_daddr = usin->sin6_addr; 215 np->flow_label = fl6.flowlabel; 216 217 /* 218 * TCP over IPv4 219 */ 220 221 if (addr_type & IPV6_ADDR_MAPPED) { 222 u32 exthdrlen = icsk->icsk_ext_hdr_len; 223 struct sockaddr_in sin; 224 225 if (ipv6_only_sock(sk)) 226 return -ENETUNREACH; 227 228 sin.sin_family = AF_INET; 229 sin.sin_port = usin->sin6_port; 230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 231 232 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 233 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 234 if (sk_is_mptcp(sk)) 235 mptcpv6_handle_mapped(sk, true); 236 sk->sk_backlog_rcv = tcp_v4_do_rcv; 237 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 238 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 239 #endif 240 241 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 242 243 if (err) { 244 icsk->icsk_ext_hdr_len = exthdrlen; 245 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 246 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 247 if (sk_is_mptcp(sk)) 248 mptcpv6_handle_mapped(sk, false); 249 sk->sk_backlog_rcv = tcp_v6_do_rcv; 250 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 251 tp->af_specific = &tcp_sock_ipv6_specific; 252 #endif 253 goto failure; 254 } 255 np->saddr = sk->sk_v6_rcv_saddr; 256 257 return err; 258 } 259 260 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 261 saddr = &sk->sk_v6_rcv_saddr; 262 263 fl6.flowi6_proto = IPPROTO_TCP; 264 fl6.daddr = sk->sk_v6_daddr; 265 fl6.saddr = saddr ? *saddr : np->saddr; 266 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 267 fl6.flowi6_oif = sk->sk_bound_dev_if; 268 fl6.flowi6_mark = sk->sk_mark; 269 fl6.fl6_dport = usin->sin6_port; 270 fl6.fl6_sport = inet->inet_sport; 271 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) 272 fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; 273 fl6.flowi6_uid = sk_uid(sk); 274 275 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 276 final_p = fl6_update_dst(&fl6, opt, &final); 277 278 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 279 280 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 281 if (IS_ERR(dst)) { 282 err = PTR_ERR(dst); 283 goto failure; 284 } 285 286 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 293 if (err) 294 goto failure; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, false, false); 303 304 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 305 if (opt) 306 icsk->icsk_ext_hdr_len += opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 328 sk->sk_v6_daddr.s6_addr32); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 372 tcp_sync_mss(sk, dst_mtu(dst)); 373 tcp_simple_retransmit(sk); 374 } 375 } 376 377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 378 u8 type, u8 code, int offset, __be32 info) 379 { 380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 382 struct net *net = dev_net_rcu(skb->dev); 383 struct request_sock *fastopen; 384 struct ipv6_pinfo *np; 385 struct tcp_sock *tp; 386 __u32 seq, snd_una; 387 struct sock *sk; 388 bool fatal; 389 int err; 390 391 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 392 &hdr->saddr, ntohs(th->source), 393 skb->dev->ifindex, inet6_sdif(skb)); 394 395 if (!sk) { 396 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 397 ICMP6_MIB_INERRORS); 398 return -ENOENT; 399 } 400 401 if (sk->sk_state == TCP_TIME_WAIT) { 402 /* To increase the counter of ignored icmps for TCP-AO */ 403 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 404 inet_twsk_put(inet_twsk(sk)); 405 return 0; 406 } 407 seq = ntohl(th->seq); 408 fatal = icmpv6_err_convert(type, code, &err); 409 if (sk->sk_state == TCP_NEW_SYN_RECV) { 410 tcp_req_err(sk, seq, fatal); 411 return 0; 412 } 413 414 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 415 sock_put(sk); 416 return 0; 417 } 418 419 bh_lock_sock(sk); 420 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 421 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 422 423 if (sk->sk_state == TCP_CLOSE) 424 goto out; 425 426 if (static_branch_unlikely(&ip6_min_hopcount)) { 427 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 428 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 429 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 430 goto out; 431 } 432 } 433 434 tp = tcp_sk(sk); 435 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 436 fastopen = rcu_dereference(tp->fastopen_rsk); 437 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 438 if (sk->sk_state != TCP_LISTEN && 439 !between(seq, snd_una, tp->snd_nxt)) { 440 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 441 goto out; 442 } 443 444 np = tcp_inet6_sk(sk); 445 446 if (type == NDISC_REDIRECT) { 447 if (!sock_owned_by_user(sk)) { 448 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 449 450 if (dst) 451 dst->ops->redirect(dst, sk, skb); 452 } 453 goto out; 454 } 455 456 if (type == ICMPV6_PKT_TOOBIG) { 457 u32 mtu = ntohl(info); 458 459 /* We are not interested in TCP_LISTEN and open_requests 460 * (SYN-ACKs send out by Linux are always <576bytes so 461 * they should go through unfragmented). 462 */ 463 if (sk->sk_state == TCP_LISTEN) 464 goto out; 465 466 if (!ip6_sk_accept_pmtu(sk)) 467 goto out; 468 469 if (mtu < IPV6_MIN_MTU) 470 goto out; 471 472 WRITE_ONCE(tp->mtu_info, mtu); 473 474 if (!sock_owned_by_user(sk)) 475 tcp_v6_mtu_reduced(sk); 476 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 477 &sk->sk_tsq_flags)) 478 sock_hold(sk); 479 goto out; 480 } 481 482 483 /* Might be for an request_sock */ 484 switch (sk->sk_state) { 485 case TCP_SYN_SENT: 486 case TCP_SYN_RECV: 487 /* Only in fast or simultaneous open. If a fast open socket is 488 * already accepted it is treated as a connected one below. 489 */ 490 if (fastopen && !fastopen->sk) 491 break; 492 493 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 494 495 if (!sock_owned_by_user(sk)) 496 tcp_done_with_error(sk, err); 497 else 498 WRITE_ONCE(sk->sk_err_soft, err); 499 goto out; 500 case TCP_LISTEN: 501 break; 502 default: 503 /* check if this ICMP message allows revert of backoff. 504 * (see RFC 6069) 505 */ 506 if (!fastopen && type == ICMPV6_DEST_UNREACH && 507 code == ICMPV6_NOROUTE) 508 tcp_ld_RTO_revert(sk, seq); 509 } 510 511 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 512 WRITE_ONCE(sk->sk_err, err); 513 sk_error_report(sk); 514 } else { 515 WRITE_ONCE(sk->sk_err_soft, err); 516 } 517 out: 518 bh_unlock_sock(sk); 519 sock_put(sk); 520 return 0; 521 } 522 523 524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 525 struct flowi *fl, 526 struct request_sock *req, 527 struct tcp_fastopen_cookie *foc, 528 enum tcp_synack_type synack_type, 529 struct sk_buff *syn_skb) 530 { 531 struct inet_request_sock *ireq = inet_rsk(req); 532 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 533 struct ipv6_txoptions *opt; 534 struct flowi6 *fl6 = &fl->u.ip6; 535 struct sk_buff *skb; 536 int err = -ENOMEM; 537 u8 tclass; 538 539 /* First, grab a route. */ 540 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 541 IPPROTO_TCP)) == NULL) 542 goto done; 543 544 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 545 546 if (skb) { 547 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 548 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 549 &ireq->ir_v6_rmt_addr); 550 551 fl6->daddr = ireq->ir_v6_rmt_addr; 552 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 553 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 554 555 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 556 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 557 (np->tclass & INET_ECN_MASK) : 558 np->tclass; 559 560 if (!INET_ECN_is_capable(tclass) && 561 tcp_bpf_ca_needs_ecn((struct sock *)req)) 562 tclass |= INET_ECN_ECT_0; 563 564 rcu_read_lock(); 565 opt = ireq->ipv6_opt; 566 if (!opt) 567 opt = rcu_dereference(np->opt); 568 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 569 opt, tclass, READ_ONCE(sk->sk_priority)); 570 rcu_read_unlock(); 571 err = net_xmit_eval(err); 572 } 573 574 done: 575 return err; 576 } 577 578 579 static void tcp_v6_reqsk_destructor(struct request_sock *req) 580 { 581 kfree(inet_rsk(req)->ipv6_opt); 582 consume_skb(inet_rsk(req)->pktopts); 583 } 584 585 #ifdef CONFIG_TCP_MD5SIG 586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 587 const struct in6_addr *addr, 588 int l3index) 589 { 590 return tcp_md5_do_lookup(sk, l3index, 591 (union tcp_md5_addr *)addr, AF_INET6); 592 } 593 594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 595 const struct sock *addr_sk) 596 { 597 int l3index; 598 599 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 600 addr_sk->sk_bound_dev_if); 601 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 602 l3index); 603 } 604 605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 606 sockptr_t optval, int optlen) 607 { 608 struct tcp_md5sig cmd; 609 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 610 union tcp_ao_addr *addr; 611 int l3index = 0; 612 u8 prefixlen; 613 bool l3flag; 614 u8 flags; 615 616 if (optlen < sizeof(cmd)) 617 return -EINVAL; 618 619 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 620 return -EFAULT; 621 622 if (sin6->sin6_family != AF_INET6) 623 return -EINVAL; 624 625 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 626 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 627 628 if (optname == TCP_MD5SIG_EXT && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 630 prefixlen = cmd.tcpm_prefixlen; 631 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 632 prefixlen > 32)) 633 return -EINVAL; 634 } else { 635 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 636 } 637 638 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 639 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 640 struct net_device *dev; 641 642 rcu_read_lock(); 643 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 644 if (dev && netif_is_l3_master(dev)) 645 l3index = dev->ifindex; 646 rcu_read_unlock(); 647 648 /* ok to reference set/not set outside of rcu; 649 * right now device MUST be an L3 master 650 */ 651 if (!dev || !l3index) 652 return -EINVAL; 653 } 654 655 if (!cmd.tcpm_keylen) { 656 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 657 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 658 AF_INET, prefixlen, 659 l3index, flags); 660 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 661 AF_INET6, prefixlen, l3index, flags); 662 } 663 664 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 665 return -EINVAL; 666 667 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 668 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 669 670 /* Don't allow keys for peers that have a matching TCP-AO key. 671 * See the comment in tcp_ao_add_cmd() 672 */ 673 if (tcp_ao_required(sk, addr, AF_INET, 674 l3flag ? l3index : -1, false)) 675 return -EKEYREJECTED; 676 return tcp_md5_do_add(sk, addr, 677 AF_INET, prefixlen, l3index, flags, 678 cmd.tcpm_key, cmd.tcpm_keylen); 679 } 680 681 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 682 683 /* Don't allow keys for peers that have a matching TCP-AO key. 684 * See the comment in tcp_ao_add_cmd() 685 */ 686 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 687 return -EKEYREJECTED; 688 689 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 690 cmd.tcpm_key, cmd.tcpm_keylen); 691 } 692 693 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 694 const struct in6_addr *daddr, 695 const struct in6_addr *saddr, 696 const struct tcphdr *th, int nbytes) 697 { 698 struct { 699 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 700 struct tcphdr tcp; 701 } h; 702 703 h.ip.saddr = *saddr; 704 h.ip.daddr = *daddr; 705 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 706 h.ip.len = cpu_to_be32(nbytes); 707 h.tcp = *th; 708 h.tcp.check = 0; 709 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 710 } 711 712 static noinline_for_stack void 713 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 714 const struct in6_addr *daddr, struct in6_addr *saddr, 715 const struct tcphdr *th) 716 { 717 struct md5_ctx ctx; 718 719 md5_init(&ctx); 720 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 721 tcp_md5_hash_key(&ctx, key); 722 md5_final(&ctx, md5_hash); 723 } 724 725 static noinline_for_stack void 726 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 727 const struct sock *sk, const struct sk_buff *skb) 728 { 729 const struct tcphdr *th = tcp_hdr(skb); 730 const struct in6_addr *saddr, *daddr; 731 struct md5_ctx ctx; 732 733 if (sk) { /* valid for establish/request sockets */ 734 saddr = &sk->sk_v6_rcv_saddr; 735 daddr = &sk->sk_v6_daddr; 736 } else { 737 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 738 saddr = &ip6h->saddr; 739 daddr = &ip6h->daddr; 740 } 741 742 md5_init(&ctx); 743 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 744 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 745 tcp_md5_hash_key(&ctx, key); 746 md5_final(&ctx, md5_hash); 747 } 748 #endif 749 750 static void tcp_v6_init_req(struct request_sock *req, 751 const struct sock *sk_listener, 752 struct sk_buff *skb, 753 u32 tw_isn) 754 { 755 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 756 struct inet_request_sock *ireq = inet_rsk(req); 757 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 758 759 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 760 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 761 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 762 ireq->ir_loc_addr = LOOPBACK4_IPV6; 763 764 /* So that link locals have meaning */ 765 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 766 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 767 ireq->ir_iif = tcp_v6_iif(skb); 768 769 if (!tw_isn && 770 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 771 np->rxopt.bits.rxinfo || 772 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 773 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 774 refcount_inc(&skb->users); 775 ireq->pktopts = skb; 776 } 777 } 778 779 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 780 struct sk_buff *skb, 781 struct flowi *fl, 782 struct request_sock *req, 783 u32 tw_isn) 784 { 785 tcp_v6_init_req(req, sk, skb, tw_isn); 786 787 if (security_inet_conn_request(sk, skb, req)) 788 return NULL; 789 790 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 791 } 792 793 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 794 .family = AF_INET6, 795 .obj_size = sizeof(struct tcp6_request_sock), 796 .send_ack = tcp_v6_reqsk_send_ack, 797 .destructor = tcp_v6_reqsk_destructor, 798 .send_reset = tcp_v6_send_reset, 799 }; 800 801 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 802 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 803 sizeof(struct ipv6hdr), 804 #ifdef CONFIG_TCP_MD5SIG 805 .req_md5_lookup = tcp_v6_md5_lookup, 806 .calc_md5_hash = tcp_v6_md5_hash_skb, 807 #endif 808 #ifdef CONFIG_TCP_AO 809 .ao_lookup = tcp_v6_ao_lookup_rsk, 810 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 811 .ao_synack_hash = tcp_v6_ao_synack_hash, 812 #endif 813 #ifdef CONFIG_SYN_COOKIES 814 .cookie_init_seq = cookie_v6_init_sequence, 815 #endif 816 .route_req = tcp_v6_route_req, 817 .init_seq = tcp_v6_init_seq, 818 .init_ts_off = tcp_v6_init_ts_off, 819 .send_synack = tcp_v6_send_synack, 820 }; 821 822 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 823 u32 ack, u32 win, u32 tsval, u32 tsecr, 824 int oif, int rst, u8 tclass, __be32 label, 825 u32 priority, u32 txhash, struct tcp_key *key) 826 { 827 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 828 unsigned int tot_len = sizeof(struct tcphdr); 829 struct sock *ctl_sk = net->ipv6.tcp_sk; 830 const struct tcphdr *th = tcp_hdr(skb); 831 __be32 mrst = 0, *topt; 832 struct dst_entry *dst; 833 struct sk_buff *buff; 834 struct tcphdr *t1; 835 struct flowi6 fl6; 836 u32 mark = 0; 837 838 if (tsecr) 839 tot_len += TCPOLEN_TSTAMP_ALIGNED; 840 if (tcp_key_is_md5(key)) 841 tot_len += TCPOLEN_MD5SIG_ALIGNED; 842 if (tcp_key_is_ao(key)) 843 tot_len += tcp_ao_len_aligned(key->ao_key); 844 845 #ifdef CONFIG_MPTCP 846 if (rst && !tcp_key_is_md5(key)) { 847 mrst = mptcp_reset_option(skb); 848 849 if (mrst) 850 tot_len += sizeof(__be32); 851 } 852 #endif 853 854 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 855 if (!buff) 856 return; 857 858 skb_reserve(buff, MAX_TCP_HEADER); 859 860 t1 = skb_push(buff, tot_len); 861 skb_reset_transport_header(buff); 862 863 /* Swap the send and the receive. */ 864 memset(t1, 0, sizeof(*t1)); 865 t1->dest = th->source; 866 t1->source = th->dest; 867 t1->doff = tot_len / 4; 868 t1->seq = htonl(seq); 869 t1->ack_seq = htonl(ack); 870 t1->ack = !rst || !th->ack; 871 t1->rst = rst; 872 t1->window = htons(win); 873 874 topt = (__be32 *)(t1 + 1); 875 876 if (tsecr) { 877 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 878 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 879 *topt++ = htonl(tsval); 880 *topt++ = htonl(tsecr); 881 } 882 883 if (mrst) 884 *topt++ = mrst; 885 886 #ifdef CONFIG_TCP_MD5SIG 887 if (tcp_key_is_md5(key)) { 888 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 889 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 890 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 891 &ipv6_hdr(skb)->saddr, 892 &ipv6_hdr(skb)->daddr, t1); 893 } 894 #endif 895 #ifdef CONFIG_TCP_AO 896 if (tcp_key_is_ao(key)) { 897 *topt++ = htonl((TCPOPT_AO << 24) | 898 (tcp_ao_len(key->ao_key) << 16) | 899 (key->ao_key->sndid << 8) | 900 (key->rcv_next)); 901 902 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 903 key->traffic_key, 904 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 905 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 906 t1, key->sne); 907 } 908 #endif 909 910 memset(&fl6, 0, sizeof(fl6)); 911 fl6.daddr = ipv6_hdr(skb)->saddr; 912 fl6.saddr = ipv6_hdr(skb)->daddr; 913 fl6.flowlabel = label; 914 915 buff->ip_summed = CHECKSUM_PARTIAL; 916 917 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 918 919 fl6.flowi6_proto = IPPROTO_TCP; 920 if (rt6_need_strict(&fl6.daddr) && !oif) 921 fl6.flowi6_oif = tcp_v6_iif(skb); 922 else { 923 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 924 oif = skb->skb_iif; 925 926 fl6.flowi6_oif = oif; 927 } 928 929 if (sk) { 930 /* unconstify the socket only to attach it to buff with care. */ 931 skb_set_owner_edemux(buff, (struct sock *)sk); 932 psp_reply_set_decrypted(sk, buff); 933 934 if (sk->sk_state == TCP_TIME_WAIT) 935 mark = inet_twsk(sk)->tw_mark; 936 else 937 mark = READ_ONCE(sk->sk_mark); 938 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 939 } 940 if (txhash) { 941 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 942 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 943 } 944 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 945 fl6.fl6_dport = t1->dest; 946 fl6.fl6_sport = t1->source; 947 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 948 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 949 950 /* Pass a socket to ip6_dst_lookup either it is for RST 951 * Underlying function will use this to retrieve the network 952 * namespace 953 */ 954 if (sk && sk->sk_state != TCP_TIME_WAIT) 955 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 956 else 957 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 958 if (!IS_ERR(dst)) { 959 skb_dst_set(buff, dst); 960 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 961 tclass, priority); 962 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 963 if (rst) 964 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 965 return; 966 } 967 968 kfree_skb(buff); 969 } 970 971 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 972 enum sk_rst_reason reason) 973 { 974 const struct tcphdr *th = tcp_hdr(skb); 975 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 976 const __u8 *md5_hash_location = NULL; 977 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 978 bool allocated_traffic_key = false; 979 #endif 980 const struct tcp_ao_hdr *aoh; 981 struct tcp_key key = {}; 982 u32 seq = 0, ack_seq = 0; 983 __be32 label = 0; 984 u32 priority = 0; 985 struct net *net; 986 u32 txhash = 0; 987 int oif = 0; 988 #ifdef CONFIG_TCP_MD5SIG 989 unsigned char newhash[16]; 990 struct sock *sk1 = NULL; 991 #endif 992 993 if (th->rst) 994 return; 995 996 /* If sk not NULL, it means we did a successful lookup and incoming 997 * route had to be correct. prequeue might have dropped our dst. 998 */ 999 if (!sk && !ipv6_unicast_destination(skb)) 1000 return; 1001 1002 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1003 /* Invalid TCP option size or twice included auth */ 1004 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1005 return; 1006 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1007 rcu_read_lock(); 1008 #endif 1009 #ifdef CONFIG_TCP_MD5SIG 1010 if (sk && sk_fullsock(sk)) { 1011 int l3index; 1012 1013 /* sdif set, means packet ingressed via a device 1014 * in an L3 domain and inet_iif is set to it. 1015 */ 1016 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1017 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1018 if (key.md5_key) 1019 key.type = TCP_KEY_MD5; 1020 } else if (md5_hash_location) { 1021 int dif = tcp_v6_iif_l3_slave(skb); 1022 int sdif = tcp_v6_sdif(skb); 1023 int l3index; 1024 1025 /* 1026 * active side is lost. Try to find listening socket through 1027 * source port, and then find md5 key through listening socket. 1028 * we are not loose security here: 1029 * Incoming packet is checked with md5 hash with finding key, 1030 * no RST generated if md5 hash doesn't match. 1031 */ 1032 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1033 &ipv6h->daddr, ntohs(th->source), 1034 dif, sdif); 1035 if (!sk1) 1036 goto out; 1037 1038 /* sdif set, means packet ingressed via a device 1039 * in an L3 domain and dif is set to it. 1040 */ 1041 l3index = tcp_v6_sdif(skb) ? dif : 0; 1042 1043 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1044 if (!key.md5_key) 1045 goto out; 1046 key.type = TCP_KEY_MD5; 1047 1048 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1049 if (memcmp(md5_hash_location, newhash, 16) != 0) 1050 goto out; 1051 } 1052 #endif 1053 1054 if (th->ack) 1055 seq = ntohl(th->ack_seq); 1056 else 1057 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1058 (th->doff << 2); 1059 1060 #ifdef CONFIG_TCP_AO 1061 if (aoh) { 1062 int l3index; 1063 1064 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1065 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1066 &key.ao_key, &key.traffic_key, 1067 &allocated_traffic_key, 1068 &key.rcv_next, &key.sne)) 1069 goto out; 1070 key.type = TCP_KEY_AO; 1071 } 1072 #endif 1073 1074 if (sk) { 1075 oif = sk->sk_bound_dev_if; 1076 if (sk_fullsock(sk)) { 1077 if (inet6_test_bit(REPFLOW, sk)) 1078 label = ip6_flowlabel(ipv6h); 1079 priority = READ_ONCE(sk->sk_priority); 1080 txhash = sk->sk_txhash; 1081 } 1082 if (sk->sk_state == TCP_TIME_WAIT) { 1083 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1084 priority = inet_twsk(sk)->tw_priority; 1085 txhash = inet_twsk(sk)->tw_txhash; 1086 } 1087 } else { 1088 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1089 label = ip6_flowlabel(ipv6h); 1090 } 1091 1092 trace_tcp_send_reset(sk, skb, reason); 1093 1094 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1095 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1096 label, priority, txhash, 1097 &key); 1098 1099 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1100 out: 1101 if (allocated_traffic_key) 1102 kfree(key.traffic_key); 1103 rcu_read_unlock(); 1104 #endif 1105 } 1106 1107 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1108 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1109 struct tcp_key *key, u8 tclass, 1110 __be32 label, u32 priority, u32 txhash) 1111 { 1112 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1113 tclass, label, priority, txhash, key); 1114 } 1115 1116 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1117 enum tcp_tw_status tw_status) 1118 { 1119 struct inet_timewait_sock *tw = inet_twsk(sk); 1120 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1121 u8 tclass = tw->tw_tclass; 1122 struct tcp_key key = {}; 1123 1124 if (tw_status == TCP_TW_ACK_OOW) 1125 tclass &= ~INET_ECN_MASK; 1126 #ifdef CONFIG_TCP_AO 1127 struct tcp_ao_info *ao_info; 1128 1129 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1130 1131 /* FIXME: the segment to-be-acked is not verified yet */ 1132 ao_info = rcu_dereference(tcptw->ao_info); 1133 if (ao_info) { 1134 const struct tcp_ao_hdr *aoh; 1135 1136 /* Invalid TCP option size or twice included auth */ 1137 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1138 goto out; 1139 if (aoh) 1140 key.ao_key = tcp_ao_established_key(sk, ao_info, 1141 aoh->rnext_keyid, -1); 1142 } 1143 } 1144 if (key.ao_key) { 1145 struct tcp_ao_key *rnext_key; 1146 1147 key.traffic_key = snd_other_key(key.ao_key); 1148 /* rcv_next switches to our rcv_next */ 1149 rnext_key = READ_ONCE(ao_info->rnext_key); 1150 key.rcv_next = rnext_key->rcvid; 1151 key.sne = READ_ONCE(ao_info->snd_sne); 1152 key.type = TCP_KEY_AO; 1153 #else 1154 if (0) { 1155 #endif 1156 #ifdef CONFIG_TCP_MD5SIG 1157 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1158 key.md5_key = tcp_twsk_md5_key(tcptw); 1159 if (key.md5_key) 1160 key.type = TCP_KEY_MD5; 1161 #endif 1162 } 1163 1164 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1165 READ_ONCE(tcptw->tw_rcv_nxt), 1166 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1167 tcp_tw_tsval(tcptw), 1168 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1169 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1170 tw->tw_priority, tw->tw_txhash); 1171 1172 #ifdef CONFIG_TCP_AO 1173 out: 1174 #endif 1175 inet_twsk_put(tw); 1176 } 1177 1178 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1179 struct request_sock *req) 1180 { 1181 struct tcp_key key = {}; 1182 1183 #ifdef CONFIG_TCP_AO 1184 if (static_branch_unlikely(&tcp_ao_needed.key) && 1185 tcp_rsk_used_ao(req)) { 1186 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1187 const struct tcp_ao_hdr *aoh; 1188 int l3index; 1189 1190 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1191 /* Invalid TCP option size or twice included auth */ 1192 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1193 return; 1194 if (!aoh) 1195 return; 1196 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1197 (union tcp_ao_addr *)addr, 1198 AF_INET6, aoh->rnext_keyid, -1); 1199 if (unlikely(!key.ao_key)) { 1200 /* Send ACK with any matching MKT for the peer */ 1201 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1202 (union tcp_ao_addr *)addr, 1203 AF_INET6, -1, -1); 1204 /* Matching key disappeared (user removed the key?) 1205 * let the handshake timeout. 1206 */ 1207 if (!key.ao_key) { 1208 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1209 addr, 1210 ntohs(tcp_hdr(skb)->source), 1211 &ipv6_hdr(skb)->daddr, 1212 ntohs(tcp_hdr(skb)->dest)); 1213 return; 1214 } 1215 } 1216 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1217 if (!key.traffic_key) 1218 return; 1219 1220 key.type = TCP_KEY_AO; 1221 key.rcv_next = aoh->keyid; 1222 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1223 #else 1224 if (0) { 1225 #endif 1226 #ifdef CONFIG_TCP_MD5SIG 1227 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1228 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1229 1230 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1231 l3index); 1232 if (key.md5_key) 1233 key.type = TCP_KEY_MD5; 1234 #endif 1235 } 1236 1237 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1238 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1239 */ 1240 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1241 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1242 tcp_rsk(req)->rcv_nxt, 1243 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1244 tcp_rsk_tsval(tcp_rsk(req)), 1245 req->ts_recent, sk->sk_bound_dev_if, 1246 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1247 0, 1248 READ_ONCE(sk->sk_priority), 1249 READ_ONCE(tcp_rsk(req)->txhash)); 1250 if (tcp_key_is_ao(&key)) 1251 kfree(key.traffic_key); 1252 } 1253 1254 1255 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1256 { 1257 #ifdef CONFIG_SYN_COOKIES 1258 const struct tcphdr *th = tcp_hdr(skb); 1259 1260 if (!th->syn) 1261 sk = cookie_v6_check(sk, skb); 1262 #endif 1263 return sk; 1264 } 1265 1266 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1267 struct tcphdr *th, u32 *cookie) 1268 { 1269 u16 mss = 0; 1270 #ifdef CONFIG_SYN_COOKIES 1271 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1272 &tcp_request_sock_ipv6_ops, sk, th); 1273 if (mss) { 1274 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1275 tcp_synq_overflow(sk); 1276 } 1277 #endif 1278 return mss; 1279 } 1280 1281 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1282 { 1283 if (skb->protocol == htons(ETH_P_IP)) 1284 return tcp_v4_conn_request(sk, skb); 1285 1286 if (!ipv6_unicast_destination(skb)) 1287 goto drop; 1288 1289 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1290 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1291 return 0; 1292 } 1293 1294 return tcp_conn_request(&tcp6_request_sock_ops, 1295 &tcp_request_sock_ipv6_ops, sk, skb); 1296 1297 drop: 1298 tcp_listendrop(sk); 1299 return 0; /* don't send reset */ 1300 } 1301 1302 static void tcp_v6_restore_cb(struct sk_buff *skb) 1303 { 1304 /* We need to move header back to the beginning if xfrm6_policy_check() 1305 * and tcp_v6_fill_cb() are going to be called again. 1306 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1307 */ 1308 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1309 sizeof(struct inet6_skb_parm)); 1310 } 1311 1312 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1313 struct request_sock *req, 1314 struct dst_entry *dst, 1315 struct request_sock *req_unhash, 1316 bool *own_req) 1317 { 1318 struct inet_request_sock *ireq; 1319 struct ipv6_pinfo *newnp; 1320 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1321 struct ipv6_txoptions *opt; 1322 struct inet_sock *newinet; 1323 bool found_dup_sk = false; 1324 struct tcp_sock *newtp; 1325 struct sock *newsk; 1326 #ifdef CONFIG_TCP_MD5SIG 1327 struct tcp_md5sig_key *key; 1328 int l3index; 1329 #endif 1330 struct flowi6 fl6; 1331 1332 if (skb->protocol == htons(ETH_P_IP)) { 1333 /* 1334 * v6 mapped 1335 */ 1336 1337 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1338 req_unhash, own_req); 1339 1340 if (!newsk) 1341 return NULL; 1342 1343 newinet = inet_sk(newsk); 1344 newinet->pinet6 = tcp_inet6_sk(newsk); 1345 newinet->ipv6_fl_list = NULL; 1346 1347 newnp = tcp_inet6_sk(newsk); 1348 newtp = tcp_sk(newsk); 1349 1350 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1351 1352 newnp->saddr = newsk->sk_v6_rcv_saddr; 1353 1354 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1355 if (sk_is_mptcp(newsk)) 1356 mptcpv6_handle_mapped(newsk, true); 1357 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1358 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1359 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1360 #endif 1361 1362 newnp->ipv6_mc_list = NULL; 1363 newnp->ipv6_ac_list = NULL; 1364 newnp->pktoptions = NULL; 1365 newnp->opt = NULL; 1366 newnp->mcast_oif = inet_iif(skb); 1367 newnp->mcast_hops = ip_hdr(skb)->ttl; 1368 newnp->rcv_flowinfo = 0; 1369 if (inet6_test_bit(REPFLOW, sk)) 1370 newnp->flow_label = 0; 1371 1372 /* 1373 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1374 * here, tcp_create_openreq_child now does this for us, see the comment in 1375 * that function for the gory details. -acme 1376 */ 1377 1378 /* It is tricky place. Until this moment IPv4 tcp 1379 worked with IPv6 icsk.icsk_af_ops. 1380 Sync it now. 1381 */ 1382 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1383 1384 return newsk; 1385 } 1386 1387 ireq = inet_rsk(req); 1388 1389 if (sk_acceptq_is_full(sk)) 1390 goto exit_overflow; 1391 1392 if (!dst) { 1393 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1394 if (!dst) 1395 goto exit; 1396 } 1397 1398 newsk = tcp_create_openreq_child(sk, req, skb); 1399 if (!newsk) 1400 goto exit_nonewsk; 1401 1402 /* 1403 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1404 * count here, tcp_create_openreq_child now does this for us, see the 1405 * comment in that function for the gory details. -acme 1406 */ 1407 1408 newsk->sk_gso_type = SKB_GSO_TCPV6; 1409 inet6_sk_rx_dst_set(newsk, skb); 1410 1411 newinet = inet_sk(newsk); 1412 newinet->pinet6 = tcp_inet6_sk(newsk); 1413 newinet->ipv6_fl_list = NULL; 1414 newinet->inet_opt = NULL; 1415 1416 newtp = tcp_sk(newsk); 1417 newnp = tcp_inet6_sk(newsk); 1418 1419 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1420 1421 ip6_dst_store(newsk, dst, false, false); 1422 1423 newnp->saddr = ireq->ir_v6_loc_addr; 1424 1425 /* Now IPv6 options... 1426 1427 First: no IPv4 options. 1428 */ 1429 newnp->ipv6_mc_list = NULL; 1430 newnp->ipv6_ac_list = NULL; 1431 1432 /* Clone RX bits */ 1433 newnp->rxopt.all = np->rxopt.all; 1434 1435 newnp->pktoptions = NULL; 1436 newnp->opt = NULL; 1437 newnp->mcast_oif = tcp_v6_iif(skb); 1438 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1439 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1440 if (inet6_test_bit(REPFLOW, sk)) 1441 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1442 1443 /* Set ToS of the new socket based upon the value of incoming SYN. 1444 * ECT bits are set later in tcp_init_transfer(). 1445 */ 1446 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1447 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1448 1449 /* Clone native IPv6 options from listening socket (if any) 1450 1451 Yes, keeping reference count would be much more clever, 1452 but we make one more one thing there: reattach optmem 1453 to newsk. 1454 */ 1455 opt = ireq->ipv6_opt; 1456 if (!opt) 1457 opt = rcu_dereference(np->opt); 1458 if (opt) { 1459 opt = ipv6_dup_options(newsk, opt); 1460 RCU_INIT_POINTER(newnp->opt, opt); 1461 } 1462 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1463 if (opt) 1464 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1465 opt->opt_flen; 1466 1467 tcp_ca_openreq_child(newsk, dst); 1468 1469 tcp_sync_mss(newsk, dst_mtu(dst)); 1470 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1471 1472 tcp_initialize_rcv_mss(newsk); 1473 1474 #ifdef CONFIG_TCP_MD5SIG 1475 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1476 1477 if (!tcp_rsk_used_ao(req)) { 1478 /* Copy over the MD5 key from the original socket */ 1479 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1480 if (key) { 1481 const union tcp_md5_addr *addr; 1482 1483 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1484 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1485 goto put_and_exit; 1486 } 1487 } 1488 #endif 1489 #ifdef CONFIG_TCP_AO 1490 /* Copy over tcp_ao_info if any */ 1491 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1492 goto put_and_exit; /* OOM */ 1493 #endif 1494 1495 if (__inet_inherit_port(sk, newsk) < 0) 1496 goto put_and_exit; 1497 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1498 &found_dup_sk); 1499 if (*own_req) { 1500 tcp_move_syn(newtp, req); 1501 1502 /* Clone pktoptions received with SYN, if we own the req */ 1503 if (ireq->pktopts) { 1504 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1505 consume_skb(ireq->pktopts); 1506 ireq->pktopts = NULL; 1507 if (newnp->pktoptions) 1508 tcp_v6_restore_cb(newnp->pktoptions); 1509 } 1510 } else { 1511 if (!req_unhash && found_dup_sk) { 1512 /* This code path should only be executed in the 1513 * syncookie case only 1514 */ 1515 bh_unlock_sock(newsk); 1516 sock_put(newsk); 1517 newsk = NULL; 1518 } 1519 } 1520 1521 return newsk; 1522 1523 exit_overflow: 1524 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1525 exit_nonewsk: 1526 dst_release(dst); 1527 exit: 1528 tcp_listendrop(sk); 1529 return NULL; 1530 put_and_exit: 1531 inet_csk_prepare_forced_close(newsk); 1532 tcp_done(newsk); 1533 goto exit; 1534 } 1535 1536 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1537 u32)); 1538 /* The socket must have it's spinlock held when we get 1539 * here, unless it is a TCP_LISTEN socket. 1540 * 1541 * We have a potential double-lock case here, so even when 1542 * doing backlog processing we use the BH locking scheme. 1543 * This is because we cannot sleep with the original spinlock 1544 * held. 1545 */ 1546 INDIRECT_CALLABLE_SCOPE 1547 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1548 { 1549 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1550 struct sk_buff *opt_skb = NULL; 1551 enum skb_drop_reason reason; 1552 struct tcp_sock *tp; 1553 1554 /* Imagine: socket is IPv6. IPv4 packet arrives, 1555 goes to IPv4 receive handler and backlogged. 1556 From backlog it always goes here. Kerboom... 1557 Fortunately, tcp_rcv_established and rcv_established 1558 handle them correctly, but it is not case with 1559 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1560 */ 1561 1562 if (skb->protocol == htons(ETH_P_IP)) 1563 return tcp_v4_do_rcv(sk, skb); 1564 1565 reason = psp_sk_rx_policy_check(sk, skb); 1566 if (reason) 1567 goto err_discard; 1568 1569 /* 1570 * socket locking is here for SMP purposes as backlog rcv 1571 * is currently called with bh processing disabled. 1572 */ 1573 1574 /* Do Stevens' IPV6_PKTOPTIONS. 1575 1576 Yes, guys, it is the only place in our code, where we 1577 may make it not affecting IPv4. 1578 The rest of code is protocol independent, 1579 and I do not like idea to uglify IPv4. 1580 1581 Actually, all the idea behind IPV6_PKTOPTIONS 1582 looks not very well thought. For now we latch 1583 options, received in the last packet, enqueued 1584 by tcp. Feel free to propose better solution. 1585 --ANK (980728) 1586 */ 1587 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1588 opt_skb = skb_clone_and_charge_r(skb, sk); 1589 1590 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1591 struct dst_entry *dst; 1592 1593 dst = rcu_dereference_protected(sk->sk_rx_dst, 1594 lockdep_sock_is_held(sk)); 1595 1596 sock_rps_save_rxhash(sk, skb); 1597 sk_mark_napi_id(sk, skb); 1598 if (dst) { 1599 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1600 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1601 dst, sk->sk_rx_dst_cookie) == NULL) { 1602 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1603 dst_release(dst); 1604 } 1605 } 1606 1607 tcp_rcv_established(sk, skb); 1608 if (opt_skb) 1609 goto ipv6_pktoptions; 1610 return 0; 1611 } 1612 1613 if (tcp_checksum_complete(skb)) 1614 goto csum_err; 1615 1616 if (sk->sk_state == TCP_LISTEN) { 1617 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1618 1619 if (nsk != sk) { 1620 if (nsk) { 1621 reason = tcp_child_process(sk, nsk, skb); 1622 if (reason) 1623 goto reset; 1624 } 1625 return 0; 1626 } 1627 } else 1628 sock_rps_save_rxhash(sk, skb); 1629 1630 reason = tcp_rcv_state_process(sk, skb); 1631 if (reason) 1632 goto reset; 1633 if (opt_skb) 1634 goto ipv6_pktoptions; 1635 return 0; 1636 1637 reset: 1638 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1639 discard: 1640 if (opt_skb) 1641 __kfree_skb(opt_skb); 1642 sk_skb_reason_drop(sk, skb, reason); 1643 return 0; 1644 csum_err: 1645 reason = SKB_DROP_REASON_TCP_CSUM; 1646 trace_tcp_bad_csum(skb); 1647 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1648 err_discard: 1649 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1650 goto discard; 1651 1652 1653 ipv6_pktoptions: 1654 /* Do you ask, what is it? 1655 1656 1. skb was enqueued by tcp. 1657 2. skb is added to tail of read queue, rather than out of order. 1658 3. socket is not in passive state. 1659 4. Finally, it really contains options, which user wants to receive. 1660 */ 1661 tp = tcp_sk(sk); 1662 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1663 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1664 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1665 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1666 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1667 WRITE_ONCE(np->mcast_hops, 1668 ipv6_hdr(opt_skb)->hop_limit); 1669 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1670 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1671 if (inet6_test_bit(REPFLOW, sk)) 1672 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1673 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1674 tcp_v6_restore_cb(opt_skb); 1675 opt_skb = xchg(&np->pktoptions, opt_skb); 1676 } else { 1677 __kfree_skb(opt_skb); 1678 opt_skb = xchg(&np->pktoptions, NULL); 1679 } 1680 } 1681 1682 consume_skb(opt_skb); 1683 return 0; 1684 } 1685 1686 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1687 const struct tcphdr *th) 1688 { 1689 /* This is tricky: we move IP6CB at its correct location into 1690 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1691 * _decode_session6() uses IP6CB(). 1692 * barrier() makes sure compiler won't play aliasing games. 1693 */ 1694 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1695 sizeof(struct inet6_skb_parm)); 1696 barrier(); 1697 1698 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1699 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1700 skb->len - th->doff*4); 1701 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1702 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1703 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1704 TCP_SKB_CB(skb)->sacked = 0; 1705 TCP_SKB_CB(skb)->has_rxtstamp = 1706 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1707 } 1708 1709 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1710 { 1711 struct net *net = dev_net_rcu(skb->dev); 1712 enum skb_drop_reason drop_reason; 1713 enum tcp_tw_status tw_status; 1714 int sdif = inet6_sdif(skb); 1715 int dif = inet6_iif(skb); 1716 const struct tcphdr *th; 1717 const struct ipv6hdr *hdr; 1718 struct sock *sk = NULL; 1719 bool refcounted; 1720 int ret; 1721 u32 isn; 1722 1723 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1724 if (skb->pkt_type != PACKET_HOST) 1725 goto discard_it; 1726 1727 /* 1728 * Count it even if it's bad. 1729 */ 1730 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1731 1732 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1733 goto discard_it; 1734 1735 th = (const struct tcphdr *)skb->data; 1736 1737 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1738 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1739 goto bad_packet; 1740 } 1741 if (!pskb_may_pull(skb, th->doff*4)) 1742 goto discard_it; 1743 1744 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1745 goto csum_error; 1746 1747 th = (const struct tcphdr *)skb->data; 1748 hdr = ipv6_hdr(skb); 1749 1750 lookup: 1751 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1752 th->source, th->dest, inet6_iif(skb), sdif, 1753 &refcounted); 1754 if (!sk) 1755 goto no_tcp_socket; 1756 1757 if (sk->sk_state == TCP_TIME_WAIT) 1758 goto do_time_wait; 1759 1760 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1761 struct request_sock *req = inet_reqsk(sk); 1762 bool req_stolen = false; 1763 struct sock *nsk; 1764 1765 sk = req->rsk_listener; 1766 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1767 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1768 else 1769 drop_reason = tcp_inbound_hash(sk, req, skb, 1770 &hdr->saddr, &hdr->daddr, 1771 AF_INET6, dif, sdif); 1772 if (drop_reason) { 1773 sk_drops_skbadd(sk, skb); 1774 reqsk_put(req); 1775 goto discard_it; 1776 } 1777 if (tcp_checksum_complete(skb)) { 1778 reqsk_put(req); 1779 goto csum_error; 1780 } 1781 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1782 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1783 if (!nsk) { 1784 inet_csk_reqsk_queue_drop_and_put(sk, req); 1785 goto lookup; 1786 } 1787 sk = nsk; 1788 /* reuseport_migrate_sock() has already held one sk_refcnt 1789 * before returning. 1790 */ 1791 } else { 1792 sock_hold(sk); 1793 } 1794 refcounted = true; 1795 nsk = NULL; 1796 if (!tcp_filter(sk, skb, &drop_reason)) { 1797 th = (const struct tcphdr *)skb->data; 1798 hdr = ipv6_hdr(skb); 1799 tcp_v6_fill_cb(skb, hdr, th); 1800 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1801 &drop_reason); 1802 } 1803 if (!nsk) { 1804 reqsk_put(req); 1805 if (req_stolen) { 1806 /* Another cpu got exclusive access to req 1807 * and created a full blown socket. 1808 * Try to feed this packet to this socket 1809 * instead of discarding it. 1810 */ 1811 tcp_v6_restore_cb(skb); 1812 sock_put(sk); 1813 goto lookup; 1814 } 1815 goto discard_and_relse; 1816 } 1817 nf_reset_ct(skb); 1818 if (nsk == sk) { 1819 reqsk_put(req); 1820 tcp_v6_restore_cb(skb); 1821 } else { 1822 drop_reason = tcp_child_process(sk, nsk, skb); 1823 if (drop_reason) { 1824 enum sk_rst_reason rst_reason; 1825 1826 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1827 tcp_v6_send_reset(nsk, skb, rst_reason); 1828 goto discard_and_relse; 1829 } 1830 sock_put(sk); 1831 return 0; 1832 } 1833 } 1834 1835 process: 1836 if (static_branch_unlikely(&ip6_min_hopcount)) { 1837 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1838 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1839 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1840 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1841 goto discard_and_relse; 1842 } 1843 } 1844 1845 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1846 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1847 goto discard_and_relse; 1848 } 1849 1850 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1851 AF_INET6, dif, sdif); 1852 if (drop_reason) 1853 goto discard_and_relse; 1854 1855 nf_reset_ct(skb); 1856 1857 if (tcp_filter(sk, skb, &drop_reason)) 1858 goto discard_and_relse; 1859 1860 th = (const struct tcphdr *)skb->data; 1861 hdr = ipv6_hdr(skb); 1862 tcp_v6_fill_cb(skb, hdr, th); 1863 1864 skb->dev = NULL; 1865 1866 if (sk->sk_state == TCP_LISTEN) { 1867 ret = tcp_v6_do_rcv(sk, skb); 1868 goto put_and_return; 1869 } 1870 1871 sk_incoming_cpu_update(sk); 1872 1873 bh_lock_sock_nested(sk); 1874 tcp_segs_in(tcp_sk(sk), skb); 1875 ret = 0; 1876 if (!sock_owned_by_user(sk)) { 1877 ret = tcp_v6_do_rcv(sk, skb); 1878 } else { 1879 if (tcp_add_backlog(sk, skb, &drop_reason)) 1880 goto discard_and_relse; 1881 } 1882 bh_unlock_sock(sk); 1883 put_and_return: 1884 if (refcounted) 1885 sock_put(sk); 1886 return ret ? -1 : 0; 1887 1888 no_tcp_socket: 1889 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1890 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1891 goto discard_it; 1892 1893 tcp_v6_fill_cb(skb, hdr, th); 1894 1895 if (tcp_checksum_complete(skb)) { 1896 csum_error: 1897 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1898 trace_tcp_bad_csum(skb); 1899 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1900 bad_packet: 1901 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1902 } else { 1903 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1904 } 1905 1906 discard_it: 1907 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1908 sk_skb_reason_drop(sk, skb, drop_reason); 1909 return 0; 1910 1911 discard_and_relse: 1912 sk_drops_skbadd(sk, skb); 1913 if (refcounted) 1914 sock_put(sk); 1915 goto discard_it; 1916 1917 do_time_wait: 1918 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1919 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1920 inet_twsk_put(inet_twsk(sk)); 1921 goto discard_it; 1922 } 1923 1924 tcp_v6_fill_cb(skb, hdr, th); 1925 1926 if (tcp_checksum_complete(skb)) { 1927 inet_twsk_put(inet_twsk(sk)); 1928 goto csum_error; 1929 } 1930 1931 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1932 &drop_reason); 1933 switch (tw_status) { 1934 case TCP_TW_SYN: 1935 { 1936 struct sock *sk2; 1937 1938 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1939 &ipv6_hdr(skb)->saddr, th->source, 1940 &ipv6_hdr(skb)->daddr, 1941 ntohs(th->dest), 1942 tcp_v6_iif_l3_slave(skb), 1943 sdif); 1944 if (sk2) { 1945 struct inet_timewait_sock *tw = inet_twsk(sk); 1946 inet_twsk_deschedule_put(tw); 1947 sk = sk2; 1948 tcp_v6_restore_cb(skb); 1949 refcounted = false; 1950 __this_cpu_write(tcp_tw_isn, isn); 1951 goto process; 1952 } 1953 1954 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1955 if (drop_reason) 1956 break; 1957 } 1958 /* to ACK */ 1959 fallthrough; 1960 case TCP_TW_ACK: 1961 case TCP_TW_ACK_OOW: 1962 tcp_v6_timewait_ack(sk, skb, tw_status); 1963 break; 1964 case TCP_TW_RST: 1965 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1966 inet_twsk_deschedule_put(inet_twsk(sk)); 1967 goto discard_it; 1968 case TCP_TW_SUCCESS: 1969 ; 1970 } 1971 goto discard_it; 1972 } 1973 1974 void tcp_v6_early_demux(struct sk_buff *skb) 1975 { 1976 struct net *net = dev_net_rcu(skb->dev); 1977 const struct ipv6hdr *hdr; 1978 const struct tcphdr *th; 1979 struct sock *sk; 1980 1981 if (skb->pkt_type != PACKET_HOST) 1982 return; 1983 1984 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1985 return; 1986 1987 hdr = ipv6_hdr(skb); 1988 th = tcp_hdr(skb); 1989 1990 if (th->doff < sizeof(struct tcphdr) / 4) 1991 return; 1992 1993 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1994 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1995 &hdr->daddr, ntohs(th->dest), 1996 inet6_iif(skb), inet6_sdif(skb)); 1997 if (sk) { 1998 skb->sk = sk; 1999 skb->destructor = sock_edemux; 2000 if (sk_fullsock(sk)) { 2001 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2002 2003 if (dst) 2004 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2005 if (dst && 2006 sk->sk_rx_dst_ifindex == skb->skb_iif) 2007 skb_dst_set_noref(skb, dst); 2008 } 2009 } 2010 } 2011 2012 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2013 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2014 }; 2015 2016 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2017 { 2018 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2019 } 2020 2021 const struct inet_connection_sock_af_ops ipv6_specific = { 2022 .queue_xmit = inet6_csk_xmit, 2023 .send_check = tcp_v6_send_check, 2024 .rebuild_header = inet6_sk_rebuild_header, 2025 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2026 .conn_request = tcp_v6_conn_request, 2027 .syn_recv_sock = tcp_v6_syn_recv_sock, 2028 .net_header_len = sizeof(struct ipv6hdr), 2029 .setsockopt = ipv6_setsockopt, 2030 .getsockopt = ipv6_getsockopt, 2031 .mtu_reduced = tcp_v6_mtu_reduced, 2032 }; 2033 2034 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2035 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2036 #ifdef CONFIG_TCP_MD5SIG 2037 .md5_lookup = tcp_v6_md5_lookup, 2038 .calc_md5_hash = tcp_v6_md5_hash_skb, 2039 .md5_parse = tcp_v6_parse_md5_keys, 2040 #endif 2041 #ifdef CONFIG_TCP_AO 2042 .ao_lookup = tcp_v6_ao_lookup, 2043 .calc_ao_hash = tcp_v6_ao_hash_skb, 2044 .ao_parse = tcp_v6_parse_ao, 2045 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2046 #endif 2047 }; 2048 #endif 2049 2050 /* 2051 * TCP over IPv4 via INET6 API 2052 */ 2053 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2054 .queue_xmit = ip_queue_xmit, 2055 .send_check = tcp_v4_send_check, 2056 .rebuild_header = inet_sk_rebuild_header, 2057 .sk_rx_dst_set = inet_sk_rx_dst_set, 2058 .conn_request = tcp_v6_conn_request, 2059 .syn_recv_sock = tcp_v6_syn_recv_sock, 2060 .net_header_len = sizeof(struct iphdr), 2061 .setsockopt = ipv6_setsockopt, 2062 .getsockopt = ipv6_getsockopt, 2063 .mtu_reduced = tcp_v4_mtu_reduced, 2064 }; 2065 2066 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2067 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2068 #ifdef CONFIG_TCP_MD5SIG 2069 .md5_lookup = tcp_v4_md5_lookup, 2070 .calc_md5_hash = tcp_v4_md5_hash_skb, 2071 .md5_parse = tcp_v6_parse_md5_keys, 2072 #endif 2073 #ifdef CONFIG_TCP_AO 2074 .ao_lookup = tcp_v6_ao_lookup, 2075 .calc_ao_hash = tcp_v4_ao_hash_skb, 2076 .ao_parse = tcp_v6_parse_ao, 2077 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2078 #endif 2079 }; 2080 2081 static void tcp6_destruct_sock(struct sock *sk) 2082 { 2083 tcp_md5_destruct_sock(sk); 2084 tcp_ao_destroy_sock(sk, false); 2085 inet6_sock_destruct(sk); 2086 } 2087 #endif 2088 2089 /* NOTE: A lot of things set to zero explicitly by call to 2090 * sk_alloc() so need not be done here. 2091 */ 2092 static int tcp_v6_init_sock(struct sock *sk) 2093 { 2094 struct inet_connection_sock *icsk = inet_csk(sk); 2095 2096 tcp_init_sock(sk); 2097 2098 icsk->icsk_af_ops = &ipv6_specific; 2099 2100 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2101 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2102 sk->sk_destruct = tcp6_destruct_sock; 2103 #endif 2104 2105 return 0; 2106 } 2107 2108 #ifdef CONFIG_PROC_FS 2109 /* Proc filesystem TCPv6 sock list dumping. */ 2110 static void get_openreq6(struct seq_file *seq, 2111 const struct request_sock *req, int i) 2112 { 2113 long ttd = req->rsk_timer.expires - jiffies; 2114 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2115 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2116 2117 if (ttd < 0) 2118 ttd = 0; 2119 2120 seq_printf(seq, 2121 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2122 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2123 i, 2124 src->s6_addr32[0], src->s6_addr32[1], 2125 src->s6_addr32[2], src->s6_addr32[3], 2126 inet_rsk(req)->ir_num, 2127 dest->s6_addr32[0], dest->s6_addr32[1], 2128 dest->s6_addr32[2], dest->s6_addr32[3], 2129 ntohs(inet_rsk(req)->ir_rmt_port), 2130 TCP_SYN_RECV, 2131 0, 0, /* could print option size, but that is af dependent. */ 2132 1, /* timers active (only the expire timer) */ 2133 jiffies_to_clock_t(ttd), 2134 req->num_timeout, 2135 from_kuid_munged(seq_user_ns(seq), 2136 sk_uid(req->rsk_listener)), 2137 0, /* non standard timer */ 2138 0, /* open_requests have no inode */ 2139 0, req); 2140 } 2141 2142 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2143 { 2144 const struct in6_addr *dest, *src; 2145 __u16 destp, srcp; 2146 int timer_active; 2147 unsigned long timer_expires; 2148 const struct inet_sock *inet = inet_sk(sp); 2149 const struct tcp_sock *tp = tcp_sk(sp); 2150 const struct inet_connection_sock *icsk = inet_csk(sp); 2151 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2152 u8 icsk_pending; 2153 int rx_queue; 2154 int state; 2155 2156 dest = &sp->sk_v6_daddr; 2157 src = &sp->sk_v6_rcv_saddr; 2158 destp = ntohs(inet->inet_dport); 2159 srcp = ntohs(inet->inet_sport); 2160 2161 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2162 if (icsk_pending == ICSK_TIME_RETRANS || 2163 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2164 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2165 timer_active = 1; 2166 timer_expires = tcp_timeout_expires(sp); 2167 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2168 timer_active = 4; 2169 timer_expires = tcp_timeout_expires(sp); 2170 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2171 timer_active = 2; 2172 timer_expires = icsk->icsk_keepalive_timer.expires; 2173 } else { 2174 timer_active = 0; 2175 timer_expires = jiffies; 2176 } 2177 2178 state = inet_sk_state_load(sp); 2179 if (state == TCP_LISTEN) 2180 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2181 else 2182 /* Because we don't lock the socket, 2183 * we might find a transient negative value. 2184 */ 2185 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2186 READ_ONCE(tp->copied_seq), 0); 2187 2188 seq_printf(seq, 2189 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2190 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2191 i, 2192 src->s6_addr32[0], src->s6_addr32[1], 2193 src->s6_addr32[2], src->s6_addr32[3], srcp, 2194 dest->s6_addr32[0], dest->s6_addr32[1], 2195 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2196 state, 2197 READ_ONCE(tp->write_seq) - tp->snd_una, 2198 rx_queue, 2199 timer_active, 2200 jiffies_delta_to_clock_t(timer_expires - jiffies), 2201 READ_ONCE(icsk->icsk_retransmits), 2202 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2203 READ_ONCE(icsk->icsk_probes_out), 2204 sock_i_ino(sp), 2205 refcount_read(&sp->sk_refcnt), sp, 2206 jiffies_to_clock_t(icsk->icsk_rto), 2207 jiffies_to_clock_t(icsk->icsk_ack.ato), 2208 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2209 tcp_snd_cwnd(tp), 2210 state == TCP_LISTEN ? 2211 fastopenq->max_qlen : 2212 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2213 ); 2214 } 2215 2216 static void get_timewait6_sock(struct seq_file *seq, 2217 struct inet_timewait_sock *tw, int i) 2218 { 2219 long delta = tw->tw_timer.expires - jiffies; 2220 const struct in6_addr *dest, *src; 2221 __u16 destp, srcp; 2222 2223 dest = &tw->tw_v6_daddr; 2224 src = &tw->tw_v6_rcv_saddr; 2225 destp = ntohs(tw->tw_dport); 2226 srcp = ntohs(tw->tw_sport); 2227 2228 seq_printf(seq, 2229 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2230 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2231 i, 2232 src->s6_addr32[0], src->s6_addr32[1], 2233 src->s6_addr32[2], src->s6_addr32[3], srcp, 2234 dest->s6_addr32[0], dest->s6_addr32[1], 2235 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2236 READ_ONCE(tw->tw_substate), 0, 0, 2237 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2238 refcount_read(&tw->tw_refcnt), tw); 2239 } 2240 2241 static int tcp6_seq_show(struct seq_file *seq, void *v) 2242 { 2243 struct tcp_iter_state *st; 2244 struct sock *sk = v; 2245 2246 if (v == SEQ_START_TOKEN) { 2247 seq_puts(seq, 2248 " sl " 2249 "local_address " 2250 "remote_address " 2251 "st tx_queue rx_queue tr tm->when retrnsmt" 2252 " uid timeout inode\n"); 2253 goto out; 2254 } 2255 st = seq->private; 2256 2257 if (sk->sk_state == TCP_TIME_WAIT) 2258 get_timewait6_sock(seq, v, st->num); 2259 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2260 get_openreq6(seq, v, st->num); 2261 else 2262 get_tcp6_sock(seq, v, st->num); 2263 out: 2264 return 0; 2265 } 2266 2267 static const struct seq_operations tcp6_seq_ops = { 2268 .show = tcp6_seq_show, 2269 .start = tcp_seq_start, 2270 .next = tcp_seq_next, 2271 .stop = tcp_seq_stop, 2272 }; 2273 2274 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2275 .family = AF_INET6, 2276 }; 2277 2278 int __net_init tcp6_proc_init(struct net *net) 2279 { 2280 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2281 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2282 return -ENOMEM; 2283 return 0; 2284 } 2285 2286 void tcp6_proc_exit(struct net *net) 2287 { 2288 remove_proc_entry("tcp6", net->proc_net); 2289 } 2290 #endif 2291 2292 struct proto tcpv6_prot = { 2293 .name = "TCPv6", 2294 .owner = THIS_MODULE, 2295 .close = tcp_close, 2296 .pre_connect = tcp_v6_pre_connect, 2297 .connect = tcp_v6_connect, 2298 .disconnect = tcp_disconnect, 2299 .accept = inet_csk_accept, 2300 .ioctl = tcp_ioctl, 2301 .init = tcp_v6_init_sock, 2302 .destroy = tcp_v4_destroy_sock, 2303 .shutdown = tcp_shutdown, 2304 .setsockopt = tcp_setsockopt, 2305 .getsockopt = tcp_getsockopt, 2306 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2307 .keepalive = tcp_set_keepalive, 2308 .recvmsg = tcp_recvmsg, 2309 .sendmsg = tcp_sendmsg, 2310 .splice_eof = tcp_splice_eof, 2311 .backlog_rcv = tcp_v6_do_rcv, 2312 .release_cb = tcp_release_cb, 2313 .hash = inet_hash, 2314 .unhash = inet_unhash, 2315 .get_port = inet_csk_get_port, 2316 .put_port = inet_put_port, 2317 #ifdef CONFIG_BPF_SYSCALL 2318 .psock_update_sk_prot = tcp_bpf_update_proto, 2319 #endif 2320 .enter_memory_pressure = tcp_enter_memory_pressure, 2321 .leave_memory_pressure = tcp_leave_memory_pressure, 2322 .stream_memory_free = tcp_stream_memory_free, 2323 .sockets_allocated = &tcp_sockets_allocated, 2324 2325 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2326 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2327 2328 .memory_pressure = &tcp_memory_pressure, 2329 .sysctl_mem = sysctl_tcp_mem, 2330 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2331 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2332 .max_header = MAX_TCP_HEADER, 2333 .obj_size = sizeof(struct tcp6_sock), 2334 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2335 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2336 .twsk_prot = &tcp6_timewait_sock_ops, 2337 .rsk_prot = &tcp6_request_sock_ops, 2338 .h.hashinfo = NULL, 2339 .no_autobind = true, 2340 .diag_destroy = tcp_abort, 2341 }; 2342 EXPORT_SYMBOL_GPL(tcpv6_prot); 2343 2344 2345 static struct inet_protosw tcpv6_protosw = { 2346 .type = SOCK_STREAM, 2347 .protocol = IPPROTO_TCP, 2348 .prot = &tcpv6_prot, 2349 .ops = &inet6_stream_ops, 2350 .flags = INET_PROTOSW_PERMANENT | 2351 INET_PROTOSW_ICSK, 2352 }; 2353 2354 static int __net_init tcpv6_net_init(struct net *net) 2355 { 2356 int res; 2357 2358 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2359 SOCK_RAW, IPPROTO_TCP, net); 2360 if (!res) 2361 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2362 2363 return res; 2364 } 2365 2366 static void __net_exit tcpv6_net_exit(struct net *net) 2367 { 2368 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2369 } 2370 2371 static struct pernet_operations tcpv6_net_ops = { 2372 .init = tcpv6_net_init, 2373 .exit = tcpv6_net_exit, 2374 }; 2375 2376 int __init tcpv6_init(void) 2377 { 2378 int ret; 2379 2380 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2381 .handler = tcp_v6_rcv, 2382 .err_handler = tcp_v6_err, 2383 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2384 }; 2385 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2386 if (ret) 2387 goto out; 2388 2389 /* register inet6 protocol */ 2390 ret = inet6_register_protosw(&tcpv6_protosw); 2391 if (ret) 2392 goto out_tcpv6_protocol; 2393 2394 ret = register_pernet_subsys(&tcpv6_net_ops); 2395 if (ret) 2396 goto out_tcpv6_protosw; 2397 2398 ret = mptcpv6_init(); 2399 if (ret) 2400 goto out_tcpv6_pernet_subsys; 2401 2402 out: 2403 return ret; 2404 2405 out_tcpv6_pernet_subsys: 2406 unregister_pernet_subsys(&tcpv6_net_ops); 2407 out_tcpv6_protosw: 2408 inet6_unregister_protosw(&tcpv6_protosw); 2409 out_tcpv6_protocol: 2410 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2411 goto out; 2412 } 2413 2414 void tcpv6_exit(void) 2415 { 2416 unregister_pernet_subsys(&tcpv6_net_ops); 2417 inet6_unregister_protosw(&tcpv6_protosw); 2418 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2419 } 2420