1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct in6_addr *saddr = NULL, *final_p, final; 142 struct inet_timewait_death_row *tcp_death_row; 143 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 memset(&fl6, 0, sizeof(fl6)); 160 161 if (inet6_test_bit(SNDFLOW, sk)) { 162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 163 IP6_ECN_flow_init(fl6.flowlabel); 164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 165 struct ip6_flowlabel *flowlabel; 166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 167 if (IS_ERR(flowlabel)) 168 return -EINVAL; 169 fl6_sock_release(flowlabel); 170 } 171 } 172 173 /* 174 * connect() to INADDR_ANY means loopback (BSD'ism). 175 */ 176 177 if (ipv6_addr_any(&usin->sin6_addr)) { 178 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 179 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 180 &usin->sin6_addr); 181 else 182 usin->sin6_addr = in6addr_loopback; 183 } 184 185 addr_type = ipv6_addr_type(&usin->sin6_addr); 186 187 if (addr_type & IPV6_ADDR_MULTICAST) 188 return -ENETUNREACH; 189 190 if (addr_type&IPV6_ADDR_LINKLOCAL) { 191 if (addr_len >= sizeof(struct sockaddr_in6) && 192 usin->sin6_scope_id) { 193 /* If interface is set while binding, indices 194 * must coincide. 195 */ 196 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 197 return -EINVAL; 198 199 sk->sk_bound_dev_if = usin->sin6_scope_id; 200 } 201 202 /* Connect to link-local address requires an interface */ 203 if (!sk->sk_bound_dev_if) 204 return -EINVAL; 205 } 206 207 if (tp->rx_opt.ts_recent_stamp && 208 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 209 tp->rx_opt.ts_recent = 0; 210 tp->rx_opt.ts_recent_stamp = 0; 211 WRITE_ONCE(tp->write_seq, 0); 212 } 213 214 sk->sk_v6_daddr = usin->sin6_addr; 215 np->flow_label = fl6.flowlabel; 216 217 /* 218 * TCP over IPv4 219 */ 220 221 if (addr_type & IPV6_ADDR_MAPPED) { 222 u32 exthdrlen = icsk->icsk_ext_hdr_len; 223 struct sockaddr_in sin; 224 225 if (ipv6_only_sock(sk)) 226 return -ENETUNREACH; 227 228 sin.sin_family = AF_INET; 229 sin.sin_port = usin->sin6_port; 230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 231 232 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 233 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 234 if (sk_is_mptcp(sk)) 235 mptcpv6_handle_mapped(sk, true); 236 sk->sk_backlog_rcv = tcp_v4_do_rcv; 237 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 238 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 239 #endif 240 241 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 242 243 if (err) { 244 icsk->icsk_ext_hdr_len = exthdrlen; 245 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 246 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 247 if (sk_is_mptcp(sk)) 248 mptcpv6_handle_mapped(sk, false); 249 sk->sk_backlog_rcv = tcp_v6_do_rcv; 250 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 251 tp->af_specific = &tcp_sock_ipv6_specific; 252 #endif 253 goto failure; 254 } 255 np->saddr = sk->sk_v6_rcv_saddr; 256 257 return err; 258 } 259 260 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 261 saddr = &sk->sk_v6_rcv_saddr; 262 263 fl6.flowi6_proto = IPPROTO_TCP; 264 fl6.daddr = sk->sk_v6_daddr; 265 fl6.saddr = saddr ? *saddr : np->saddr; 266 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 267 fl6.flowi6_oif = sk->sk_bound_dev_if; 268 fl6.flowi6_mark = sk->sk_mark; 269 fl6.fl6_dport = usin->sin6_port; 270 fl6.fl6_sport = inet->inet_sport; 271 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) 272 fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; 273 fl6.flowi6_uid = sk_uid(sk); 274 275 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 276 final_p = fl6_update_dst(&fl6, opt, &final); 277 278 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 279 280 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 281 if (IS_ERR(dst)) { 282 err = PTR_ERR(dst); 283 goto failure; 284 } 285 286 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 293 if (err) 294 goto failure; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, false, false); 303 304 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 305 if (opt) 306 icsk->icsk_ext_hdr_len += opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 328 sk->sk_v6_daddr.s6_addr32); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 372 tcp_sync_mss(sk, dst_mtu(dst)); 373 tcp_simple_retransmit(sk); 374 } 375 } 376 377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 378 u8 type, u8 code, int offset, __be32 info) 379 { 380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 382 struct net *net = dev_net_rcu(skb->dev); 383 struct request_sock *fastopen; 384 struct ipv6_pinfo *np; 385 struct tcp_sock *tp; 386 __u32 seq, snd_una; 387 struct sock *sk; 388 bool fatal; 389 int err; 390 391 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 392 &hdr->saddr, ntohs(th->source), 393 skb->dev->ifindex, inet6_sdif(skb)); 394 395 if (!sk) { 396 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 397 ICMP6_MIB_INERRORS); 398 return -ENOENT; 399 } 400 401 if (sk->sk_state == TCP_TIME_WAIT) { 402 /* To increase the counter of ignored icmps for TCP-AO */ 403 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 404 inet_twsk_put(inet_twsk(sk)); 405 return 0; 406 } 407 seq = ntohl(th->seq); 408 fatal = icmpv6_err_convert(type, code, &err); 409 if (sk->sk_state == TCP_NEW_SYN_RECV) { 410 tcp_req_err(sk, seq, fatal); 411 return 0; 412 } 413 414 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 415 sock_put(sk); 416 return 0; 417 } 418 419 bh_lock_sock(sk); 420 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 421 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 422 423 if (sk->sk_state == TCP_CLOSE) 424 goto out; 425 426 if (static_branch_unlikely(&ip6_min_hopcount)) { 427 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 428 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 429 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 430 goto out; 431 } 432 } 433 434 tp = tcp_sk(sk); 435 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 436 fastopen = rcu_dereference(tp->fastopen_rsk); 437 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 438 if (sk->sk_state != TCP_LISTEN && 439 !between(seq, snd_una, tp->snd_nxt)) { 440 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 441 goto out; 442 } 443 444 np = tcp_inet6_sk(sk); 445 446 if (type == NDISC_REDIRECT) { 447 if (!sock_owned_by_user(sk)) { 448 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 449 450 if (dst) 451 dst->ops->redirect(dst, sk, skb); 452 } 453 goto out; 454 } 455 456 if (type == ICMPV6_PKT_TOOBIG) { 457 u32 mtu = ntohl(info); 458 459 /* We are not interested in TCP_LISTEN and open_requests 460 * (SYN-ACKs send out by Linux are always <576bytes so 461 * they should go through unfragmented). 462 */ 463 if (sk->sk_state == TCP_LISTEN) 464 goto out; 465 466 if (!ip6_sk_accept_pmtu(sk)) 467 goto out; 468 469 if (mtu < IPV6_MIN_MTU) 470 goto out; 471 472 WRITE_ONCE(tp->mtu_info, mtu); 473 474 if (!sock_owned_by_user(sk)) 475 tcp_v6_mtu_reduced(sk); 476 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 477 &sk->sk_tsq_flags)) 478 sock_hold(sk); 479 goto out; 480 } 481 482 483 /* Might be for an request_sock */ 484 switch (sk->sk_state) { 485 case TCP_SYN_SENT: 486 case TCP_SYN_RECV: 487 /* Only in fast or simultaneous open. If a fast open socket is 488 * already accepted it is treated as a connected one below. 489 */ 490 if (fastopen && !fastopen->sk) 491 break; 492 493 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 494 495 if (!sock_owned_by_user(sk)) 496 tcp_done_with_error(sk, err); 497 else 498 WRITE_ONCE(sk->sk_err_soft, err); 499 goto out; 500 case TCP_LISTEN: 501 break; 502 default: 503 /* check if this ICMP message allows revert of backoff. 504 * (see RFC 6069) 505 */ 506 if (!fastopen && type == ICMPV6_DEST_UNREACH && 507 code == ICMPV6_NOROUTE) 508 tcp_ld_RTO_revert(sk, seq); 509 } 510 511 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 512 WRITE_ONCE(sk->sk_err, err); 513 sk_error_report(sk); 514 } else { 515 WRITE_ONCE(sk->sk_err_soft, err); 516 } 517 out: 518 bh_unlock_sock(sk); 519 sock_put(sk); 520 return 0; 521 } 522 523 524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 525 struct flowi *fl, 526 struct request_sock *req, 527 struct tcp_fastopen_cookie *foc, 528 enum tcp_synack_type synack_type, 529 struct sk_buff *syn_skb) 530 { 531 struct inet_request_sock *ireq = inet_rsk(req); 532 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 533 struct ipv6_txoptions *opt; 534 struct flowi6 *fl6 = &fl->u.ip6; 535 struct sk_buff *skb; 536 int err = -ENOMEM; 537 u8 tclass; 538 539 /* First, grab a route. */ 540 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 541 IPPROTO_TCP)) == NULL) 542 goto done; 543 544 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 545 546 if (skb) { 547 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 548 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 549 &ireq->ir_v6_rmt_addr); 550 551 fl6->daddr = ireq->ir_v6_rmt_addr; 552 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 553 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 554 555 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 556 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 557 (np->tclass & INET_ECN_MASK) : 558 np->tclass; 559 560 if (!INET_ECN_is_capable(tclass) && 561 tcp_bpf_ca_needs_ecn((struct sock *)req)) 562 tclass |= INET_ECN_ECT_0; 563 564 rcu_read_lock(); 565 opt = ireq->ipv6_opt; 566 if (!opt) 567 opt = rcu_dereference(np->opt); 568 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 569 opt, tclass, READ_ONCE(sk->sk_priority)); 570 rcu_read_unlock(); 571 err = net_xmit_eval(err); 572 } 573 574 done: 575 return err; 576 } 577 578 579 static void tcp_v6_reqsk_destructor(struct request_sock *req) 580 { 581 kfree(inet_rsk(req)->ipv6_opt); 582 consume_skb(inet_rsk(req)->pktopts); 583 } 584 585 #ifdef CONFIG_TCP_MD5SIG 586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 587 const struct in6_addr *addr, 588 int l3index) 589 { 590 return tcp_md5_do_lookup(sk, l3index, 591 (union tcp_md5_addr *)addr, AF_INET6); 592 } 593 594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 595 const struct sock *addr_sk) 596 { 597 int l3index; 598 599 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 600 addr_sk->sk_bound_dev_if); 601 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 602 l3index); 603 } 604 605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 606 sockptr_t optval, int optlen) 607 { 608 struct tcp_md5sig cmd; 609 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 610 union tcp_ao_addr *addr; 611 int l3index = 0; 612 u8 prefixlen; 613 bool l3flag; 614 u8 flags; 615 616 if (optlen < sizeof(cmd)) 617 return -EINVAL; 618 619 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 620 return -EFAULT; 621 622 if (sin6->sin6_family != AF_INET6) 623 return -EINVAL; 624 625 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 626 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 627 628 if (optname == TCP_MD5SIG_EXT && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 630 prefixlen = cmd.tcpm_prefixlen; 631 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 632 prefixlen > 32)) 633 return -EINVAL; 634 } else { 635 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 636 } 637 638 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 639 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 640 struct net_device *dev; 641 642 rcu_read_lock(); 643 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 644 if (dev && netif_is_l3_master(dev)) 645 l3index = dev->ifindex; 646 rcu_read_unlock(); 647 648 /* ok to reference set/not set outside of rcu; 649 * right now device MUST be an L3 master 650 */ 651 if (!dev || !l3index) 652 return -EINVAL; 653 } 654 655 if (!cmd.tcpm_keylen) { 656 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 657 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 658 AF_INET, prefixlen, 659 l3index, flags); 660 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 661 AF_INET6, prefixlen, l3index, flags); 662 } 663 664 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 665 return -EINVAL; 666 667 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 668 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 669 670 /* Don't allow keys for peers that have a matching TCP-AO key. 671 * See the comment in tcp_ao_add_cmd() 672 */ 673 if (tcp_ao_required(sk, addr, AF_INET, 674 l3flag ? l3index : -1, false)) 675 return -EKEYREJECTED; 676 return tcp_md5_do_add(sk, addr, 677 AF_INET, prefixlen, l3index, flags, 678 cmd.tcpm_key, cmd.tcpm_keylen); 679 } 680 681 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 682 683 /* Don't allow keys for peers that have a matching TCP-AO key. 684 * See the comment in tcp_ao_add_cmd() 685 */ 686 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 687 return -EKEYREJECTED; 688 689 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 690 cmd.tcpm_key, cmd.tcpm_keylen); 691 } 692 693 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 694 const struct in6_addr *daddr, 695 const struct in6_addr *saddr, 696 const struct tcphdr *th, int nbytes) 697 { 698 struct { 699 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 700 struct tcphdr tcp; 701 } h; 702 703 h.ip.saddr = *saddr; 704 h.ip.daddr = *daddr; 705 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 706 h.ip.len = cpu_to_be32(nbytes); 707 h.tcp = *th; 708 h.tcp.check = 0; 709 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 710 } 711 712 static noinline_for_stack void 713 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 714 const struct in6_addr *daddr, struct in6_addr *saddr, 715 const struct tcphdr *th) 716 { 717 struct md5_ctx ctx; 718 719 md5_init(&ctx); 720 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 721 tcp_md5_hash_key(&ctx, key); 722 md5_final(&ctx, md5_hash); 723 } 724 725 static noinline_for_stack void 726 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 727 const struct sock *sk, const struct sk_buff *skb) 728 { 729 const struct tcphdr *th = tcp_hdr(skb); 730 const struct in6_addr *saddr, *daddr; 731 struct md5_ctx ctx; 732 733 if (sk) { /* valid for establish/request sockets */ 734 saddr = &sk->sk_v6_rcv_saddr; 735 daddr = &sk->sk_v6_daddr; 736 } else { 737 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 738 saddr = &ip6h->saddr; 739 daddr = &ip6h->daddr; 740 } 741 742 md5_init(&ctx); 743 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 744 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 745 tcp_md5_hash_key(&ctx, key); 746 md5_final(&ctx, md5_hash); 747 } 748 #endif 749 750 static void tcp_v6_init_req(struct request_sock *req, 751 const struct sock *sk_listener, 752 struct sk_buff *skb, 753 u32 tw_isn) 754 { 755 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 756 struct inet_request_sock *ireq = inet_rsk(req); 757 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 758 759 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 760 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 761 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 762 ireq->ir_loc_addr = LOOPBACK4_IPV6; 763 764 /* So that link locals have meaning */ 765 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 766 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 767 ireq->ir_iif = tcp_v6_iif(skb); 768 769 if (!tw_isn && 770 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 771 np->rxopt.bits.rxinfo || 772 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 773 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 774 refcount_inc(&skb->users); 775 ireq->pktopts = skb; 776 } 777 } 778 779 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 780 struct sk_buff *skb, 781 struct flowi *fl, 782 struct request_sock *req, 783 u32 tw_isn) 784 { 785 tcp_v6_init_req(req, sk, skb, tw_isn); 786 787 if (security_inet_conn_request(sk, skb, req)) 788 return NULL; 789 790 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 791 } 792 793 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 794 .family = AF_INET6, 795 .obj_size = sizeof(struct tcp6_request_sock), 796 .send_ack = tcp_v6_reqsk_send_ack, 797 .destructor = tcp_v6_reqsk_destructor, 798 .send_reset = tcp_v6_send_reset, 799 .syn_ack_timeout = tcp_syn_ack_timeout, 800 }; 801 802 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 803 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 804 sizeof(struct ipv6hdr), 805 #ifdef CONFIG_TCP_MD5SIG 806 .req_md5_lookup = tcp_v6_md5_lookup, 807 .calc_md5_hash = tcp_v6_md5_hash_skb, 808 #endif 809 #ifdef CONFIG_TCP_AO 810 .ao_lookup = tcp_v6_ao_lookup_rsk, 811 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 812 .ao_synack_hash = tcp_v6_ao_synack_hash, 813 #endif 814 #ifdef CONFIG_SYN_COOKIES 815 .cookie_init_seq = cookie_v6_init_sequence, 816 #endif 817 .route_req = tcp_v6_route_req, 818 .init_seq = tcp_v6_init_seq, 819 .init_ts_off = tcp_v6_init_ts_off, 820 .send_synack = tcp_v6_send_synack, 821 }; 822 823 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 824 u32 ack, u32 win, u32 tsval, u32 tsecr, 825 int oif, int rst, u8 tclass, __be32 label, 826 u32 priority, u32 txhash, struct tcp_key *key) 827 { 828 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 829 unsigned int tot_len = sizeof(struct tcphdr); 830 struct sock *ctl_sk = net->ipv6.tcp_sk; 831 const struct tcphdr *th = tcp_hdr(skb); 832 __be32 mrst = 0, *topt; 833 struct dst_entry *dst; 834 struct sk_buff *buff; 835 struct tcphdr *t1; 836 struct flowi6 fl6; 837 u32 mark = 0; 838 839 if (tsecr) 840 tot_len += TCPOLEN_TSTAMP_ALIGNED; 841 if (tcp_key_is_md5(key)) 842 tot_len += TCPOLEN_MD5SIG_ALIGNED; 843 if (tcp_key_is_ao(key)) 844 tot_len += tcp_ao_len_aligned(key->ao_key); 845 846 #ifdef CONFIG_MPTCP 847 if (rst && !tcp_key_is_md5(key)) { 848 mrst = mptcp_reset_option(skb); 849 850 if (mrst) 851 tot_len += sizeof(__be32); 852 } 853 #endif 854 855 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 856 if (!buff) 857 return; 858 859 skb_reserve(buff, MAX_TCP_HEADER); 860 861 t1 = skb_push(buff, tot_len); 862 skb_reset_transport_header(buff); 863 864 /* Swap the send and the receive. */ 865 memset(t1, 0, sizeof(*t1)); 866 t1->dest = th->source; 867 t1->source = th->dest; 868 t1->doff = tot_len / 4; 869 t1->seq = htonl(seq); 870 t1->ack_seq = htonl(ack); 871 t1->ack = !rst || !th->ack; 872 t1->rst = rst; 873 t1->window = htons(win); 874 875 topt = (__be32 *)(t1 + 1); 876 877 if (tsecr) { 878 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 879 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 880 *topt++ = htonl(tsval); 881 *topt++ = htonl(tsecr); 882 } 883 884 if (mrst) 885 *topt++ = mrst; 886 887 #ifdef CONFIG_TCP_MD5SIG 888 if (tcp_key_is_md5(key)) { 889 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 890 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 891 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 892 &ipv6_hdr(skb)->saddr, 893 &ipv6_hdr(skb)->daddr, t1); 894 } 895 #endif 896 #ifdef CONFIG_TCP_AO 897 if (tcp_key_is_ao(key)) { 898 *topt++ = htonl((TCPOPT_AO << 24) | 899 (tcp_ao_len(key->ao_key) << 16) | 900 (key->ao_key->sndid << 8) | 901 (key->rcv_next)); 902 903 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 904 key->traffic_key, 905 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 906 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 907 t1, key->sne); 908 } 909 #endif 910 911 memset(&fl6, 0, sizeof(fl6)); 912 fl6.daddr = ipv6_hdr(skb)->saddr; 913 fl6.saddr = ipv6_hdr(skb)->daddr; 914 fl6.flowlabel = label; 915 916 buff->ip_summed = CHECKSUM_PARTIAL; 917 918 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 919 920 fl6.flowi6_proto = IPPROTO_TCP; 921 if (rt6_need_strict(&fl6.daddr) && !oif) 922 fl6.flowi6_oif = tcp_v6_iif(skb); 923 else { 924 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 925 oif = skb->skb_iif; 926 927 fl6.flowi6_oif = oif; 928 } 929 930 if (sk) { 931 /* unconstify the socket only to attach it to buff with care. */ 932 skb_set_owner_edemux(buff, (struct sock *)sk); 933 psp_reply_set_decrypted(sk, buff); 934 935 if (sk->sk_state == TCP_TIME_WAIT) 936 mark = inet_twsk(sk)->tw_mark; 937 else 938 mark = READ_ONCE(sk->sk_mark); 939 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 940 } 941 if (txhash) { 942 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 943 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 944 } 945 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 946 fl6.fl6_dport = t1->dest; 947 fl6.fl6_sport = t1->source; 948 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 949 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 950 951 /* Pass a socket to ip6_dst_lookup either it is for RST 952 * Underlying function will use this to retrieve the network 953 * namespace 954 */ 955 if (sk && sk->sk_state != TCP_TIME_WAIT) 956 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 957 else 958 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 959 if (!IS_ERR(dst)) { 960 skb_dst_set(buff, dst); 961 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 962 tclass, priority); 963 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 964 if (rst) 965 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 966 return; 967 } 968 969 kfree_skb(buff); 970 } 971 972 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 973 enum sk_rst_reason reason) 974 { 975 const struct tcphdr *th = tcp_hdr(skb); 976 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 977 const __u8 *md5_hash_location = NULL; 978 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 979 bool allocated_traffic_key = false; 980 #endif 981 const struct tcp_ao_hdr *aoh; 982 struct tcp_key key = {}; 983 u32 seq = 0, ack_seq = 0; 984 __be32 label = 0; 985 u32 priority = 0; 986 struct net *net; 987 u32 txhash = 0; 988 int oif = 0; 989 #ifdef CONFIG_TCP_MD5SIG 990 unsigned char newhash[16]; 991 struct sock *sk1 = NULL; 992 #endif 993 994 if (th->rst) 995 return; 996 997 /* If sk not NULL, it means we did a successful lookup and incoming 998 * route had to be correct. prequeue might have dropped our dst. 999 */ 1000 if (!sk && !ipv6_unicast_destination(skb)) 1001 return; 1002 1003 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1004 /* Invalid TCP option size or twice included auth */ 1005 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1006 return; 1007 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1008 rcu_read_lock(); 1009 #endif 1010 #ifdef CONFIG_TCP_MD5SIG 1011 if (sk && sk_fullsock(sk)) { 1012 int l3index; 1013 1014 /* sdif set, means packet ingressed via a device 1015 * in an L3 domain and inet_iif is set to it. 1016 */ 1017 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1018 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1019 if (key.md5_key) 1020 key.type = TCP_KEY_MD5; 1021 } else if (md5_hash_location) { 1022 int dif = tcp_v6_iif_l3_slave(skb); 1023 int sdif = tcp_v6_sdif(skb); 1024 int l3index; 1025 1026 /* 1027 * active side is lost. Try to find listening socket through 1028 * source port, and then find md5 key through listening socket. 1029 * we are not loose security here: 1030 * Incoming packet is checked with md5 hash with finding key, 1031 * no RST generated if md5 hash doesn't match. 1032 */ 1033 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1034 &ipv6h->daddr, ntohs(th->source), 1035 dif, sdif); 1036 if (!sk1) 1037 goto out; 1038 1039 /* sdif set, means packet ingressed via a device 1040 * in an L3 domain and dif is set to it. 1041 */ 1042 l3index = tcp_v6_sdif(skb) ? dif : 0; 1043 1044 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1045 if (!key.md5_key) 1046 goto out; 1047 key.type = TCP_KEY_MD5; 1048 1049 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1050 if (memcmp(md5_hash_location, newhash, 16) != 0) 1051 goto out; 1052 } 1053 #endif 1054 1055 if (th->ack) 1056 seq = ntohl(th->ack_seq); 1057 else 1058 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1059 (th->doff << 2); 1060 1061 #ifdef CONFIG_TCP_AO 1062 if (aoh) { 1063 int l3index; 1064 1065 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1066 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1067 &key.ao_key, &key.traffic_key, 1068 &allocated_traffic_key, 1069 &key.rcv_next, &key.sne)) 1070 goto out; 1071 key.type = TCP_KEY_AO; 1072 } 1073 #endif 1074 1075 if (sk) { 1076 oif = sk->sk_bound_dev_if; 1077 if (sk_fullsock(sk)) { 1078 if (inet6_test_bit(REPFLOW, sk)) 1079 label = ip6_flowlabel(ipv6h); 1080 priority = READ_ONCE(sk->sk_priority); 1081 txhash = sk->sk_txhash; 1082 } 1083 if (sk->sk_state == TCP_TIME_WAIT) { 1084 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1085 priority = inet_twsk(sk)->tw_priority; 1086 txhash = inet_twsk(sk)->tw_txhash; 1087 } 1088 } else { 1089 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1090 label = ip6_flowlabel(ipv6h); 1091 } 1092 1093 trace_tcp_send_reset(sk, skb, reason); 1094 1095 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1096 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1097 label, priority, txhash, 1098 &key); 1099 1100 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1101 out: 1102 if (allocated_traffic_key) 1103 kfree(key.traffic_key); 1104 rcu_read_unlock(); 1105 #endif 1106 } 1107 1108 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1109 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1110 struct tcp_key *key, u8 tclass, 1111 __be32 label, u32 priority, u32 txhash) 1112 { 1113 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1114 tclass, label, priority, txhash, key); 1115 } 1116 1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1118 enum tcp_tw_status tw_status) 1119 { 1120 struct inet_timewait_sock *tw = inet_twsk(sk); 1121 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1122 u8 tclass = tw->tw_tclass; 1123 struct tcp_key key = {}; 1124 1125 if (tw_status == TCP_TW_ACK_OOW) 1126 tclass &= ~INET_ECN_MASK; 1127 #ifdef CONFIG_TCP_AO 1128 struct tcp_ao_info *ao_info; 1129 1130 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1131 1132 /* FIXME: the segment to-be-acked is not verified yet */ 1133 ao_info = rcu_dereference(tcptw->ao_info); 1134 if (ao_info) { 1135 const struct tcp_ao_hdr *aoh; 1136 1137 /* Invalid TCP option size or twice included auth */ 1138 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1139 goto out; 1140 if (aoh) 1141 key.ao_key = tcp_ao_established_key(sk, ao_info, 1142 aoh->rnext_keyid, -1); 1143 } 1144 } 1145 if (key.ao_key) { 1146 struct tcp_ao_key *rnext_key; 1147 1148 key.traffic_key = snd_other_key(key.ao_key); 1149 /* rcv_next switches to our rcv_next */ 1150 rnext_key = READ_ONCE(ao_info->rnext_key); 1151 key.rcv_next = rnext_key->rcvid; 1152 key.sne = READ_ONCE(ao_info->snd_sne); 1153 key.type = TCP_KEY_AO; 1154 #else 1155 if (0) { 1156 #endif 1157 #ifdef CONFIG_TCP_MD5SIG 1158 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1159 key.md5_key = tcp_twsk_md5_key(tcptw); 1160 if (key.md5_key) 1161 key.type = TCP_KEY_MD5; 1162 #endif 1163 } 1164 1165 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1166 READ_ONCE(tcptw->tw_rcv_nxt), 1167 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1168 tcp_tw_tsval(tcptw), 1169 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1170 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1171 tw->tw_priority, tw->tw_txhash); 1172 1173 #ifdef CONFIG_TCP_AO 1174 out: 1175 #endif 1176 inet_twsk_put(tw); 1177 } 1178 1179 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1180 struct request_sock *req) 1181 { 1182 struct tcp_key key = {}; 1183 1184 #ifdef CONFIG_TCP_AO 1185 if (static_branch_unlikely(&tcp_ao_needed.key) && 1186 tcp_rsk_used_ao(req)) { 1187 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1188 const struct tcp_ao_hdr *aoh; 1189 int l3index; 1190 1191 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1192 /* Invalid TCP option size or twice included auth */ 1193 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1194 return; 1195 if (!aoh) 1196 return; 1197 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1198 (union tcp_ao_addr *)addr, 1199 AF_INET6, aoh->rnext_keyid, -1); 1200 if (unlikely(!key.ao_key)) { 1201 /* Send ACK with any matching MKT for the peer */ 1202 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1203 (union tcp_ao_addr *)addr, 1204 AF_INET6, -1, -1); 1205 /* Matching key disappeared (user removed the key?) 1206 * let the handshake timeout. 1207 */ 1208 if (!key.ao_key) { 1209 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1210 addr, 1211 ntohs(tcp_hdr(skb)->source), 1212 &ipv6_hdr(skb)->daddr, 1213 ntohs(tcp_hdr(skb)->dest)); 1214 return; 1215 } 1216 } 1217 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1218 if (!key.traffic_key) 1219 return; 1220 1221 key.type = TCP_KEY_AO; 1222 key.rcv_next = aoh->keyid; 1223 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1224 #else 1225 if (0) { 1226 #endif 1227 #ifdef CONFIG_TCP_MD5SIG 1228 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1229 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1230 1231 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1232 l3index); 1233 if (key.md5_key) 1234 key.type = TCP_KEY_MD5; 1235 #endif 1236 } 1237 1238 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1239 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1240 */ 1241 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1242 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1243 tcp_rsk(req)->rcv_nxt, 1244 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1245 tcp_rsk_tsval(tcp_rsk(req)), 1246 req->ts_recent, sk->sk_bound_dev_if, 1247 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1248 0, 1249 READ_ONCE(sk->sk_priority), 1250 READ_ONCE(tcp_rsk(req)->txhash)); 1251 if (tcp_key_is_ao(&key)) 1252 kfree(key.traffic_key); 1253 } 1254 1255 1256 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1257 { 1258 #ifdef CONFIG_SYN_COOKIES 1259 const struct tcphdr *th = tcp_hdr(skb); 1260 1261 if (!th->syn) 1262 sk = cookie_v6_check(sk, skb); 1263 #endif 1264 return sk; 1265 } 1266 1267 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1268 struct tcphdr *th, u32 *cookie) 1269 { 1270 u16 mss = 0; 1271 #ifdef CONFIG_SYN_COOKIES 1272 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1273 &tcp_request_sock_ipv6_ops, sk, th); 1274 if (mss) { 1275 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1276 tcp_synq_overflow(sk); 1277 } 1278 #endif 1279 return mss; 1280 } 1281 1282 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1283 { 1284 if (skb->protocol == htons(ETH_P_IP)) 1285 return tcp_v4_conn_request(sk, skb); 1286 1287 if (!ipv6_unicast_destination(skb)) 1288 goto drop; 1289 1290 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1291 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1292 return 0; 1293 } 1294 1295 return tcp_conn_request(&tcp6_request_sock_ops, 1296 &tcp_request_sock_ipv6_ops, sk, skb); 1297 1298 drop: 1299 tcp_listendrop(sk); 1300 return 0; /* don't send reset */ 1301 } 1302 1303 static void tcp_v6_restore_cb(struct sk_buff *skb) 1304 { 1305 /* We need to move header back to the beginning if xfrm6_policy_check() 1306 * and tcp_v6_fill_cb() are going to be called again. 1307 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1308 */ 1309 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1310 sizeof(struct inet6_skb_parm)); 1311 } 1312 1313 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1314 struct request_sock *req, 1315 struct dst_entry *dst, 1316 struct request_sock *req_unhash, 1317 bool *own_req) 1318 { 1319 struct inet_request_sock *ireq; 1320 struct ipv6_pinfo *newnp; 1321 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1322 struct ipv6_txoptions *opt; 1323 struct inet_sock *newinet; 1324 bool found_dup_sk = false; 1325 struct tcp_sock *newtp; 1326 struct sock *newsk; 1327 #ifdef CONFIG_TCP_MD5SIG 1328 struct tcp_md5sig_key *key; 1329 int l3index; 1330 #endif 1331 struct flowi6 fl6; 1332 1333 if (skb->protocol == htons(ETH_P_IP)) { 1334 /* 1335 * v6 mapped 1336 */ 1337 1338 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1339 req_unhash, own_req); 1340 1341 if (!newsk) 1342 return NULL; 1343 1344 newinet = inet_sk(newsk); 1345 newinet->pinet6 = tcp_inet6_sk(newsk); 1346 newinet->ipv6_fl_list = NULL; 1347 1348 newnp = tcp_inet6_sk(newsk); 1349 newtp = tcp_sk(newsk); 1350 1351 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1352 1353 newnp->saddr = newsk->sk_v6_rcv_saddr; 1354 1355 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1356 if (sk_is_mptcp(newsk)) 1357 mptcpv6_handle_mapped(newsk, true); 1358 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1359 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1360 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1361 #endif 1362 1363 newnp->ipv6_mc_list = NULL; 1364 newnp->ipv6_ac_list = NULL; 1365 newnp->pktoptions = NULL; 1366 newnp->opt = NULL; 1367 newnp->mcast_oif = inet_iif(skb); 1368 newnp->mcast_hops = ip_hdr(skb)->ttl; 1369 newnp->rcv_flowinfo = 0; 1370 if (inet6_test_bit(REPFLOW, sk)) 1371 newnp->flow_label = 0; 1372 1373 /* 1374 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1375 * here, tcp_create_openreq_child now does this for us, see the comment in 1376 * that function for the gory details. -acme 1377 */ 1378 1379 /* It is tricky place. Until this moment IPv4 tcp 1380 worked with IPv6 icsk.icsk_af_ops. 1381 Sync it now. 1382 */ 1383 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1384 1385 return newsk; 1386 } 1387 1388 ireq = inet_rsk(req); 1389 1390 if (sk_acceptq_is_full(sk)) 1391 goto exit_overflow; 1392 1393 if (!dst) { 1394 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1395 if (!dst) 1396 goto exit; 1397 } 1398 1399 newsk = tcp_create_openreq_child(sk, req, skb); 1400 if (!newsk) 1401 goto exit_nonewsk; 1402 1403 /* 1404 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1405 * count here, tcp_create_openreq_child now does this for us, see the 1406 * comment in that function for the gory details. -acme 1407 */ 1408 1409 newsk->sk_gso_type = SKB_GSO_TCPV6; 1410 inet6_sk_rx_dst_set(newsk, skb); 1411 1412 newinet = inet_sk(newsk); 1413 newinet->pinet6 = tcp_inet6_sk(newsk); 1414 newinet->ipv6_fl_list = NULL; 1415 newinet->inet_opt = NULL; 1416 1417 newtp = tcp_sk(newsk); 1418 newnp = tcp_inet6_sk(newsk); 1419 1420 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1421 1422 ip6_dst_store(newsk, dst, false, false); 1423 1424 newnp->saddr = ireq->ir_v6_loc_addr; 1425 1426 /* Now IPv6 options... 1427 1428 First: no IPv4 options. 1429 */ 1430 newnp->ipv6_mc_list = NULL; 1431 newnp->ipv6_ac_list = NULL; 1432 1433 /* Clone RX bits */ 1434 newnp->rxopt.all = np->rxopt.all; 1435 1436 newnp->pktoptions = NULL; 1437 newnp->opt = NULL; 1438 newnp->mcast_oif = tcp_v6_iif(skb); 1439 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1440 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1441 if (inet6_test_bit(REPFLOW, sk)) 1442 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1443 1444 /* Set ToS of the new socket based upon the value of incoming SYN. 1445 * ECT bits are set later in tcp_init_transfer(). 1446 */ 1447 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1448 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1449 1450 /* Clone native IPv6 options from listening socket (if any) 1451 1452 Yes, keeping reference count would be much more clever, 1453 but we make one more one thing there: reattach optmem 1454 to newsk. 1455 */ 1456 opt = ireq->ipv6_opt; 1457 if (!opt) 1458 opt = rcu_dereference(np->opt); 1459 if (opt) { 1460 opt = ipv6_dup_options(newsk, opt); 1461 RCU_INIT_POINTER(newnp->opt, opt); 1462 } 1463 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1464 if (opt) 1465 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1466 opt->opt_flen; 1467 1468 tcp_ca_openreq_child(newsk, dst); 1469 1470 tcp_sync_mss(newsk, dst_mtu(dst)); 1471 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1472 1473 tcp_initialize_rcv_mss(newsk); 1474 1475 #ifdef CONFIG_TCP_MD5SIG 1476 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1477 1478 if (!tcp_rsk_used_ao(req)) { 1479 /* Copy over the MD5 key from the original socket */ 1480 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1481 if (key) { 1482 const union tcp_md5_addr *addr; 1483 1484 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1485 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1486 goto put_and_exit; 1487 } 1488 } 1489 #endif 1490 #ifdef CONFIG_TCP_AO 1491 /* Copy over tcp_ao_info if any */ 1492 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1493 goto put_and_exit; /* OOM */ 1494 #endif 1495 1496 if (__inet_inherit_port(sk, newsk) < 0) 1497 goto put_and_exit; 1498 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1499 &found_dup_sk); 1500 if (*own_req) { 1501 tcp_move_syn(newtp, req); 1502 1503 /* Clone pktoptions received with SYN, if we own the req */ 1504 if (ireq->pktopts) { 1505 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1506 consume_skb(ireq->pktopts); 1507 ireq->pktopts = NULL; 1508 if (newnp->pktoptions) 1509 tcp_v6_restore_cb(newnp->pktoptions); 1510 } 1511 } else { 1512 if (!req_unhash && found_dup_sk) { 1513 /* This code path should only be executed in the 1514 * syncookie case only 1515 */ 1516 bh_unlock_sock(newsk); 1517 sock_put(newsk); 1518 newsk = NULL; 1519 } 1520 } 1521 1522 return newsk; 1523 1524 exit_overflow: 1525 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1526 exit_nonewsk: 1527 dst_release(dst); 1528 exit: 1529 tcp_listendrop(sk); 1530 return NULL; 1531 put_and_exit: 1532 inet_csk_prepare_forced_close(newsk); 1533 tcp_done(newsk); 1534 goto exit; 1535 } 1536 1537 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1538 u32)); 1539 /* The socket must have it's spinlock held when we get 1540 * here, unless it is a TCP_LISTEN socket. 1541 * 1542 * We have a potential double-lock case here, so even when 1543 * doing backlog processing we use the BH locking scheme. 1544 * This is because we cannot sleep with the original spinlock 1545 * held. 1546 */ 1547 INDIRECT_CALLABLE_SCOPE 1548 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1549 { 1550 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1551 struct sk_buff *opt_skb = NULL; 1552 enum skb_drop_reason reason; 1553 struct tcp_sock *tp; 1554 1555 /* Imagine: socket is IPv6. IPv4 packet arrives, 1556 goes to IPv4 receive handler and backlogged. 1557 From backlog it always goes here. Kerboom... 1558 Fortunately, tcp_rcv_established and rcv_established 1559 handle them correctly, but it is not case with 1560 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1561 */ 1562 1563 if (skb->protocol == htons(ETH_P_IP)) 1564 return tcp_v4_do_rcv(sk, skb); 1565 1566 reason = psp_sk_rx_policy_check(sk, skb); 1567 if (reason) 1568 goto err_discard; 1569 1570 /* 1571 * socket locking is here for SMP purposes as backlog rcv 1572 * is currently called with bh processing disabled. 1573 */ 1574 1575 /* Do Stevens' IPV6_PKTOPTIONS. 1576 1577 Yes, guys, it is the only place in our code, where we 1578 may make it not affecting IPv4. 1579 The rest of code is protocol independent, 1580 and I do not like idea to uglify IPv4. 1581 1582 Actually, all the idea behind IPV6_PKTOPTIONS 1583 looks not very well thought. For now we latch 1584 options, received in the last packet, enqueued 1585 by tcp. Feel free to propose better solution. 1586 --ANK (980728) 1587 */ 1588 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1589 opt_skb = skb_clone_and_charge_r(skb, sk); 1590 1591 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1592 struct dst_entry *dst; 1593 1594 dst = rcu_dereference_protected(sk->sk_rx_dst, 1595 lockdep_sock_is_held(sk)); 1596 1597 sock_rps_save_rxhash(sk, skb); 1598 sk_mark_napi_id(sk, skb); 1599 if (dst) { 1600 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1601 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1602 dst, sk->sk_rx_dst_cookie) == NULL) { 1603 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1604 dst_release(dst); 1605 } 1606 } 1607 1608 tcp_rcv_established(sk, skb); 1609 if (opt_skb) 1610 goto ipv6_pktoptions; 1611 return 0; 1612 } 1613 1614 if (tcp_checksum_complete(skb)) 1615 goto csum_err; 1616 1617 if (sk->sk_state == TCP_LISTEN) { 1618 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1619 1620 if (nsk != sk) { 1621 if (nsk) { 1622 reason = tcp_child_process(sk, nsk, skb); 1623 if (reason) 1624 goto reset; 1625 } 1626 return 0; 1627 } 1628 } else 1629 sock_rps_save_rxhash(sk, skb); 1630 1631 reason = tcp_rcv_state_process(sk, skb); 1632 if (reason) 1633 goto reset; 1634 if (opt_skb) 1635 goto ipv6_pktoptions; 1636 return 0; 1637 1638 reset: 1639 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1640 discard: 1641 if (opt_skb) 1642 __kfree_skb(opt_skb); 1643 sk_skb_reason_drop(sk, skb, reason); 1644 return 0; 1645 csum_err: 1646 reason = SKB_DROP_REASON_TCP_CSUM; 1647 trace_tcp_bad_csum(skb); 1648 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1649 err_discard: 1650 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1651 goto discard; 1652 1653 1654 ipv6_pktoptions: 1655 /* Do you ask, what is it? 1656 1657 1. skb was enqueued by tcp. 1658 2. skb is added to tail of read queue, rather than out of order. 1659 3. socket is not in passive state. 1660 4. Finally, it really contains options, which user wants to receive. 1661 */ 1662 tp = tcp_sk(sk); 1663 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1664 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1665 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1666 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1667 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1668 WRITE_ONCE(np->mcast_hops, 1669 ipv6_hdr(opt_skb)->hop_limit); 1670 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1671 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1672 if (inet6_test_bit(REPFLOW, sk)) 1673 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1674 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1675 tcp_v6_restore_cb(opt_skb); 1676 opt_skb = xchg(&np->pktoptions, opt_skb); 1677 } else { 1678 __kfree_skb(opt_skb); 1679 opt_skb = xchg(&np->pktoptions, NULL); 1680 } 1681 } 1682 1683 consume_skb(opt_skb); 1684 return 0; 1685 } 1686 1687 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1688 const struct tcphdr *th) 1689 { 1690 /* This is tricky: we move IP6CB at its correct location into 1691 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1692 * _decode_session6() uses IP6CB(). 1693 * barrier() makes sure compiler won't play aliasing games. 1694 */ 1695 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1696 sizeof(struct inet6_skb_parm)); 1697 barrier(); 1698 1699 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1700 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1701 skb->len - th->doff*4); 1702 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1703 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1704 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1705 TCP_SKB_CB(skb)->sacked = 0; 1706 TCP_SKB_CB(skb)->has_rxtstamp = 1707 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1708 } 1709 1710 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1711 { 1712 struct net *net = dev_net_rcu(skb->dev); 1713 enum skb_drop_reason drop_reason; 1714 enum tcp_tw_status tw_status; 1715 int sdif = inet6_sdif(skb); 1716 int dif = inet6_iif(skb); 1717 const struct tcphdr *th; 1718 const struct ipv6hdr *hdr; 1719 struct sock *sk = NULL; 1720 bool refcounted; 1721 int ret; 1722 u32 isn; 1723 1724 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1725 if (skb->pkt_type != PACKET_HOST) 1726 goto discard_it; 1727 1728 /* 1729 * Count it even if it's bad. 1730 */ 1731 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1732 1733 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1734 goto discard_it; 1735 1736 th = (const struct tcphdr *)skb->data; 1737 1738 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1739 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1740 goto bad_packet; 1741 } 1742 if (!pskb_may_pull(skb, th->doff*4)) 1743 goto discard_it; 1744 1745 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1746 goto csum_error; 1747 1748 th = (const struct tcphdr *)skb->data; 1749 hdr = ipv6_hdr(skb); 1750 1751 lookup: 1752 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1753 th->source, th->dest, inet6_iif(skb), sdif, 1754 &refcounted); 1755 if (!sk) 1756 goto no_tcp_socket; 1757 1758 if (sk->sk_state == TCP_TIME_WAIT) 1759 goto do_time_wait; 1760 1761 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1762 struct request_sock *req = inet_reqsk(sk); 1763 bool req_stolen = false; 1764 struct sock *nsk; 1765 1766 sk = req->rsk_listener; 1767 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1768 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1769 else 1770 drop_reason = tcp_inbound_hash(sk, req, skb, 1771 &hdr->saddr, &hdr->daddr, 1772 AF_INET6, dif, sdif); 1773 if (drop_reason) { 1774 sk_drops_skbadd(sk, skb); 1775 reqsk_put(req); 1776 goto discard_it; 1777 } 1778 if (tcp_checksum_complete(skb)) { 1779 reqsk_put(req); 1780 goto csum_error; 1781 } 1782 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1783 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1784 if (!nsk) { 1785 inet_csk_reqsk_queue_drop_and_put(sk, req); 1786 goto lookup; 1787 } 1788 sk = nsk; 1789 /* reuseport_migrate_sock() has already held one sk_refcnt 1790 * before returning. 1791 */ 1792 } else { 1793 sock_hold(sk); 1794 } 1795 refcounted = true; 1796 nsk = NULL; 1797 if (!tcp_filter(sk, skb, &drop_reason)) { 1798 th = (const struct tcphdr *)skb->data; 1799 hdr = ipv6_hdr(skb); 1800 tcp_v6_fill_cb(skb, hdr, th); 1801 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1802 &drop_reason); 1803 } 1804 if (!nsk) { 1805 reqsk_put(req); 1806 if (req_stolen) { 1807 /* Another cpu got exclusive access to req 1808 * and created a full blown socket. 1809 * Try to feed this packet to this socket 1810 * instead of discarding it. 1811 */ 1812 tcp_v6_restore_cb(skb); 1813 sock_put(sk); 1814 goto lookup; 1815 } 1816 goto discard_and_relse; 1817 } 1818 nf_reset_ct(skb); 1819 if (nsk == sk) { 1820 reqsk_put(req); 1821 tcp_v6_restore_cb(skb); 1822 } else { 1823 drop_reason = tcp_child_process(sk, nsk, skb); 1824 if (drop_reason) { 1825 enum sk_rst_reason rst_reason; 1826 1827 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1828 tcp_v6_send_reset(nsk, skb, rst_reason); 1829 goto discard_and_relse; 1830 } 1831 sock_put(sk); 1832 return 0; 1833 } 1834 } 1835 1836 process: 1837 if (static_branch_unlikely(&ip6_min_hopcount)) { 1838 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1839 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1840 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1841 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1842 goto discard_and_relse; 1843 } 1844 } 1845 1846 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1847 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1848 goto discard_and_relse; 1849 } 1850 1851 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1852 AF_INET6, dif, sdif); 1853 if (drop_reason) 1854 goto discard_and_relse; 1855 1856 nf_reset_ct(skb); 1857 1858 if (tcp_filter(sk, skb, &drop_reason)) 1859 goto discard_and_relse; 1860 1861 th = (const struct tcphdr *)skb->data; 1862 hdr = ipv6_hdr(skb); 1863 tcp_v6_fill_cb(skb, hdr, th); 1864 1865 skb->dev = NULL; 1866 1867 if (sk->sk_state == TCP_LISTEN) { 1868 ret = tcp_v6_do_rcv(sk, skb); 1869 goto put_and_return; 1870 } 1871 1872 sk_incoming_cpu_update(sk); 1873 1874 bh_lock_sock_nested(sk); 1875 tcp_segs_in(tcp_sk(sk), skb); 1876 ret = 0; 1877 if (!sock_owned_by_user(sk)) { 1878 ret = tcp_v6_do_rcv(sk, skb); 1879 } else { 1880 if (tcp_add_backlog(sk, skb, &drop_reason)) 1881 goto discard_and_relse; 1882 } 1883 bh_unlock_sock(sk); 1884 put_and_return: 1885 if (refcounted) 1886 sock_put(sk); 1887 return ret ? -1 : 0; 1888 1889 no_tcp_socket: 1890 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1891 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1892 goto discard_it; 1893 1894 tcp_v6_fill_cb(skb, hdr, th); 1895 1896 if (tcp_checksum_complete(skb)) { 1897 csum_error: 1898 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1899 trace_tcp_bad_csum(skb); 1900 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1901 bad_packet: 1902 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1903 } else { 1904 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1905 } 1906 1907 discard_it: 1908 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1909 sk_skb_reason_drop(sk, skb, drop_reason); 1910 return 0; 1911 1912 discard_and_relse: 1913 sk_drops_skbadd(sk, skb); 1914 if (refcounted) 1915 sock_put(sk); 1916 goto discard_it; 1917 1918 do_time_wait: 1919 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1920 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1921 inet_twsk_put(inet_twsk(sk)); 1922 goto discard_it; 1923 } 1924 1925 tcp_v6_fill_cb(skb, hdr, th); 1926 1927 if (tcp_checksum_complete(skb)) { 1928 inet_twsk_put(inet_twsk(sk)); 1929 goto csum_error; 1930 } 1931 1932 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1933 &drop_reason); 1934 switch (tw_status) { 1935 case TCP_TW_SYN: 1936 { 1937 struct sock *sk2; 1938 1939 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1940 &ipv6_hdr(skb)->saddr, th->source, 1941 &ipv6_hdr(skb)->daddr, 1942 ntohs(th->dest), 1943 tcp_v6_iif_l3_slave(skb), 1944 sdif); 1945 if (sk2) { 1946 struct inet_timewait_sock *tw = inet_twsk(sk); 1947 inet_twsk_deschedule_put(tw); 1948 sk = sk2; 1949 tcp_v6_restore_cb(skb); 1950 refcounted = false; 1951 __this_cpu_write(tcp_tw_isn, isn); 1952 goto process; 1953 } 1954 1955 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1956 if (drop_reason) 1957 break; 1958 } 1959 /* to ACK */ 1960 fallthrough; 1961 case TCP_TW_ACK: 1962 case TCP_TW_ACK_OOW: 1963 tcp_v6_timewait_ack(sk, skb, tw_status); 1964 break; 1965 case TCP_TW_RST: 1966 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1967 inet_twsk_deschedule_put(inet_twsk(sk)); 1968 goto discard_it; 1969 case TCP_TW_SUCCESS: 1970 ; 1971 } 1972 goto discard_it; 1973 } 1974 1975 void tcp_v6_early_demux(struct sk_buff *skb) 1976 { 1977 struct net *net = dev_net_rcu(skb->dev); 1978 const struct ipv6hdr *hdr; 1979 const struct tcphdr *th; 1980 struct sock *sk; 1981 1982 if (skb->pkt_type != PACKET_HOST) 1983 return; 1984 1985 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1986 return; 1987 1988 hdr = ipv6_hdr(skb); 1989 th = tcp_hdr(skb); 1990 1991 if (th->doff < sizeof(struct tcphdr) / 4) 1992 return; 1993 1994 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1995 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1996 &hdr->daddr, ntohs(th->dest), 1997 inet6_iif(skb), inet6_sdif(skb)); 1998 if (sk) { 1999 skb->sk = sk; 2000 skb->destructor = sock_edemux; 2001 if (sk_fullsock(sk)) { 2002 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2003 2004 if (dst) 2005 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2006 if (dst && 2007 sk->sk_rx_dst_ifindex == skb->skb_iif) 2008 skb_dst_set_noref(skb, dst); 2009 } 2010 } 2011 } 2012 2013 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2014 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2015 }; 2016 2017 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2018 { 2019 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2020 } 2021 2022 const struct inet_connection_sock_af_ops ipv6_specific = { 2023 .queue_xmit = inet6_csk_xmit, 2024 .send_check = tcp_v6_send_check, 2025 .rebuild_header = inet6_sk_rebuild_header, 2026 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2027 .conn_request = tcp_v6_conn_request, 2028 .syn_recv_sock = tcp_v6_syn_recv_sock, 2029 .net_header_len = sizeof(struct ipv6hdr), 2030 .setsockopt = ipv6_setsockopt, 2031 .getsockopt = ipv6_getsockopt, 2032 .mtu_reduced = tcp_v6_mtu_reduced, 2033 }; 2034 2035 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2036 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2037 #ifdef CONFIG_TCP_MD5SIG 2038 .md5_lookup = tcp_v6_md5_lookup, 2039 .calc_md5_hash = tcp_v6_md5_hash_skb, 2040 .md5_parse = tcp_v6_parse_md5_keys, 2041 #endif 2042 #ifdef CONFIG_TCP_AO 2043 .ao_lookup = tcp_v6_ao_lookup, 2044 .calc_ao_hash = tcp_v6_ao_hash_skb, 2045 .ao_parse = tcp_v6_parse_ao, 2046 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2047 #endif 2048 }; 2049 #endif 2050 2051 /* 2052 * TCP over IPv4 via INET6 API 2053 */ 2054 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2055 .queue_xmit = ip_queue_xmit, 2056 .send_check = tcp_v4_send_check, 2057 .rebuild_header = inet_sk_rebuild_header, 2058 .sk_rx_dst_set = inet_sk_rx_dst_set, 2059 .conn_request = tcp_v6_conn_request, 2060 .syn_recv_sock = tcp_v6_syn_recv_sock, 2061 .net_header_len = sizeof(struct iphdr), 2062 .setsockopt = ipv6_setsockopt, 2063 .getsockopt = ipv6_getsockopt, 2064 .mtu_reduced = tcp_v4_mtu_reduced, 2065 }; 2066 2067 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2068 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2069 #ifdef CONFIG_TCP_MD5SIG 2070 .md5_lookup = tcp_v4_md5_lookup, 2071 .calc_md5_hash = tcp_v4_md5_hash_skb, 2072 .md5_parse = tcp_v6_parse_md5_keys, 2073 #endif 2074 #ifdef CONFIG_TCP_AO 2075 .ao_lookup = tcp_v6_ao_lookup, 2076 .calc_ao_hash = tcp_v4_ao_hash_skb, 2077 .ao_parse = tcp_v6_parse_ao, 2078 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2079 #endif 2080 }; 2081 2082 static void tcp6_destruct_sock(struct sock *sk) 2083 { 2084 tcp_md5_destruct_sock(sk); 2085 tcp_ao_destroy_sock(sk, false); 2086 inet6_sock_destruct(sk); 2087 } 2088 #endif 2089 2090 /* NOTE: A lot of things set to zero explicitly by call to 2091 * sk_alloc() so need not be done here. 2092 */ 2093 static int tcp_v6_init_sock(struct sock *sk) 2094 { 2095 struct inet_connection_sock *icsk = inet_csk(sk); 2096 2097 tcp_init_sock(sk); 2098 2099 icsk->icsk_af_ops = &ipv6_specific; 2100 2101 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2102 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2103 sk->sk_destruct = tcp6_destruct_sock; 2104 #endif 2105 2106 return 0; 2107 } 2108 2109 #ifdef CONFIG_PROC_FS 2110 /* Proc filesystem TCPv6 sock list dumping. */ 2111 static void get_openreq6(struct seq_file *seq, 2112 const struct request_sock *req, int i) 2113 { 2114 long ttd = req->rsk_timer.expires - jiffies; 2115 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2116 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2117 2118 if (ttd < 0) 2119 ttd = 0; 2120 2121 seq_printf(seq, 2122 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2123 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2124 i, 2125 src->s6_addr32[0], src->s6_addr32[1], 2126 src->s6_addr32[2], src->s6_addr32[3], 2127 inet_rsk(req)->ir_num, 2128 dest->s6_addr32[0], dest->s6_addr32[1], 2129 dest->s6_addr32[2], dest->s6_addr32[3], 2130 ntohs(inet_rsk(req)->ir_rmt_port), 2131 TCP_SYN_RECV, 2132 0, 0, /* could print option size, but that is af dependent. */ 2133 1, /* timers active (only the expire timer) */ 2134 jiffies_to_clock_t(ttd), 2135 req->num_timeout, 2136 from_kuid_munged(seq_user_ns(seq), 2137 sk_uid(req->rsk_listener)), 2138 0, /* non standard timer */ 2139 0, /* open_requests have no inode */ 2140 0, req); 2141 } 2142 2143 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2144 { 2145 const struct in6_addr *dest, *src; 2146 __u16 destp, srcp; 2147 int timer_active; 2148 unsigned long timer_expires; 2149 const struct inet_sock *inet = inet_sk(sp); 2150 const struct tcp_sock *tp = tcp_sk(sp); 2151 const struct inet_connection_sock *icsk = inet_csk(sp); 2152 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2153 u8 icsk_pending; 2154 int rx_queue; 2155 int state; 2156 2157 dest = &sp->sk_v6_daddr; 2158 src = &sp->sk_v6_rcv_saddr; 2159 destp = ntohs(inet->inet_dport); 2160 srcp = ntohs(inet->inet_sport); 2161 2162 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2163 if (icsk_pending == ICSK_TIME_RETRANS || 2164 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2165 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2166 timer_active = 1; 2167 timer_expires = icsk_timeout(icsk); 2168 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2169 timer_active = 4; 2170 timer_expires = icsk_timeout(icsk); 2171 } else if (timer_pending(&sp->sk_timer)) { 2172 timer_active = 2; 2173 timer_expires = sp->sk_timer.expires; 2174 } else { 2175 timer_active = 0; 2176 timer_expires = jiffies; 2177 } 2178 2179 state = inet_sk_state_load(sp); 2180 if (state == TCP_LISTEN) 2181 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2182 else 2183 /* Because we don't lock the socket, 2184 * we might find a transient negative value. 2185 */ 2186 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2187 READ_ONCE(tp->copied_seq), 0); 2188 2189 seq_printf(seq, 2190 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2191 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2192 i, 2193 src->s6_addr32[0], src->s6_addr32[1], 2194 src->s6_addr32[2], src->s6_addr32[3], srcp, 2195 dest->s6_addr32[0], dest->s6_addr32[1], 2196 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2197 state, 2198 READ_ONCE(tp->write_seq) - tp->snd_una, 2199 rx_queue, 2200 timer_active, 2201 jiffies_delta_to_clock_t(timer_expires - jiffies), 2202 READ_ONCE(icsk->icsk_retransmits), 2203 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2204 READ_ONCE(icsk->icsk_probes_out), 2205 sock_i_ino(sp), 2206 refcount_read(&sp->sk_refcnt), sp, 2207 jiffies_to_clock_t(icsk->icsk_rto), 2208 jiffies_to_clock_t(icsk->icsk_ack.ato), 2209 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2210 tcp_snd_cwnd(tp), 2211 state == TCP_LISTEN ? 2212 fastopenq->max_qlen : 2213 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2214 ); 2215 } 2216 2217 static void get_timewait6_sock(struct seq_file *seq, 2218 struct inet_timewait_sock *tw, int i) 2219 { 2220 long delta = tw->tw_timer.expires - jiffies; 2221 const struct in6_addr *dest, *src; 2222 __u16 destp, srcp; 2223 2224 dest = &tw->tw_v6_daddr; 2225 src = &tw->tw_v6_rcv_saddr; 2226 destp = ntohs(tw->tw_dport); 2227 srcp = ntohs(tw->tw_sport); 2228 2229 seq_printf(seq, 2230 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2231 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2232 i, 2233 src->s6_addr32[0], src->s6_addr32[1], 2234 src->s6_addr32[2], src->s6_addr32[3], srcp, 2235 dest->s6_addr32[0], dest->s6_addr32[1], 2236 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2237 READ_ONCE(tw->tw_substate), 0, 0, 2238 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2239 refcount_read(&tw->tw_refcnt), tw); 2240 } 2241 2242 static int tcp6_seq_show(struct seq_file *seq, void *v) 2243 { 2244 struct tcp_iter_state *st; 2245 struct sock *sk = v; 2246 2247 if (v == SEQ_START_TOKEN) { 2248 seq_puts(seq, 2249 " sl " 2250 "local_address " 2251 "remote_address " 2252 "st tx_queue rx_queue tr tm->when retrnsmt" 2253 " uid timeout inode\n"); 2254 goto out; 2255 } 2256 st = seq->private; 2257 2258 if (sk->sk_state == TCP_TIME_WAIT) 2259 get_timewait6_sock(seq, v, st->num); 2260 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2261 get_openreq6(seq, v, st->num); 2262 else 2263 get_tcp6_sock(seq, v, st->num); 2264 out: 2265 return 0; 2266 } 2267 2268 static const struct seq_operations tcp6_seq_ops = { 2269 .show = tcp6_seq_show, 2270 .start = tcp_seq_start, 2271 .next = tcp_seq_next, 2272 .stop = tcp_seq_stop, 2273 }; 2274 2275 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2276 .family = AF_INET6, 2277 }; 2278 2279 int __net_init tcp6_proc_init(struct net *net) 2280 { 2281 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2282 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2283 return -ENOMEM; 2284 return 0; 2285 } 2286 2287 void tcp6_proc_exit(struct net *net) 2288 { 2289 remove_proc_entry("tcp6", net->proc_net); 2290 } 2291 #endif 2292 2293 struct proto tcpv6_prot = { 2294 .name = "TCPv6", 2295 .owner = THIS_MODULE, 2296 .close = tcp_close, 2297 .pre_connect = tcp_v6_pre_connect, 2298 .connect = tcp_v6_connect, 2299 .disconnect = tcp_disconnect, 2300 .accept = inet_csk_accept, 2301 .ioctl = tcp_ioctl, 2302 .init = tcp_v6_init_sock, 2303 .destroy = tcp_v4_destroy_sock, 2304 .shutdown = tcp_shutdown, 2305 .setsockopt = tcp_setsockopt, 2306 .getsockopt = tcp_getsockopt, 2307 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2308 .keepalive = tcp_set_keepalive, 2309 .recvmsg = tcp_recvmsg, 2310 .sendmsg = tcp_sendmsg, 2311 .splice_eof = tcp_splice_eof, 2312 .backlog_rcv = tcp_v6_do_rcv, 2313 .release_cb = tcp_release_cb, 2314 .hash = inet_hash, 2315 .unhash = inet_unhash, 2316 .get_port = inet_csk_get_port, 2317 .put_port = inet_put_port, 2318 #ifdef CONFIG_BPF_SYSCALL 2319 .psock_update_sk_prot = tcp_bpf_update_proto, 2320 #endif 2321 .enter_memory_pressure = tcp_enter_memory_pressure, 2322 .leave_memory_pressure = tcp_leave_memory_pressure, 2323 .stream_memory_free = tcp_stream_memory_free, 2324 .sockets_allocated = &tcp_sockets_allocated, 2325 2326 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2327 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2328 2329 .memory_pressure = &tcp_memory_pressure, 2330 .sysctl_mem = sysctl_tcp_mem, 2331 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2332 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2333 .max_header = MAX_TCP_HEADER, 2334 .obj_size = sizeof(struct tcp6_sock), 2335 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2336 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2337 .twsk_prot = &tcp6_timewait_sock_ops, 2338 .rsk_prot = &tcp6_request_sock_ops, 2339 .h.hashinfo = NULL, 2340 .no_autobind = true, 2341 .diag_destroy = tcp_abort, 2342 }; 2343 EXPORT_SYMBOL_GPL(tcpv6_prot); 2344 2345 2346 static struct inet_protosw tcpv6_protosw = { 2347 .type = SOCK_STREAM, 2348 .protocol = IPPROTO_TCP, 2349 .prot = &tcpv6_prot, 2350 .ops = &inet6_stream_ops, 2351 .flags = INET_PROTOSW_PERMANENT | 2352 INET_PROTOSW_ICSK, 2353 }; 2354 2355 static int __net_init tcpv6_net_init(struct net *net) 2356 { 2357 int res; 2358 2359 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2360 SOCK_RAW, IPPROTO_TCP, net); 2361 if (!res) 2362 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2363 2364 return res; 2365 } 2366 2367 static void __net_exit tcpv6_net_exit(struct net *net) 2368 { 2369 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2370 } 2371 2372 static struct pernet_operations tcpv6_net_ops = { 2373 .init = tcpv6_net_init, 2374 .exit = tcpv6_net_exit, 2375 }; 2376 2377 int __init tcpv6_init(void) 2378 { 2379 int ret; 2380 2381 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2382 .handler = tcp_v6_rcv, 2383 .err_handler = tcp_v6_err, 2384 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2385 }; 2386 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2387 if (ret) 2388 goto out; 2389 2390 /* register inet6 protocol */ 2391 ret = inet6_register_protosw(&tcpv6_protosw); 2392 if (ret) 2393 goto out_tcpv6_protocol; 2394 2395 ret = register_pernet_subsys(&tcpv6_net_ops); 2396 if (ret) 2397 goto out_tcpv6_protosw; 2398 2399 ret = mptcpv6_init(); 2400 if (ret) 2401 goto out_tcpv6_pernet_subsys; 2402 2403 out: 2404 return ret; 2405 2406 out_tcpv6_pernet_subsys: 2407 unregister_pernet_subsys(&tcpv6_net_ops); 2408 out_tcpv6_protosw: 2409 inet6_unregister_protosw(&tcpv6_protosw); 2410 out_tcpv6_protocol: 2411 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2412 goto out; 2413 } 2414 2415 void tcpv6_exit(void) 2416 { 2417 unregister_pernet_subsys(&tcpv6_net_ops); 2418 inet6_unregister_protosw(&tcpv6_protosw); 2419 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2420 } 2421