1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 66 #include <linux/proc_fs.h> 67 #include <linux/seq_file.h> 68 69 #include <crypto/hash.h> 70 #include <linux/scatterlist.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct in6_addr *saddr = NULL, *final_p, final; 142 struct inet_timewait_death_row *tcp_death_row; 143 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 memset(&fl6, 0, sizeof(fl6)); 160 161 if (inet6_test_bit(SNDFLOW, sk)) { 162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 163 IP6_ECN_flow_init(fl6.flowlabel); 164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 165 struct ip6_flowlabel *flowlabel; 166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 167 if (IS_ERR(flowlabel)) 168 return -EINVAL; 169 fl6_sock_release(flowlabel); 170 } 171 } 172 173 /* 174 * connect() to INADDR_ANY means loopback (BSD'ism). 175 */ 176 177 if (ipv6_addr_any(&usin->sin6_addr)) { 178 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 179 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 180 &usin->sin6_addr); 181 else 182 usin->sin6_addr = in6addr_loopback; 183 } 184 185 addr_type = ipv6_addr_type(&usin->sin6_addr); 186 187 if (addr_type & IPV6_ADDR_MULTICAST) 188 return -ENETUNREACH; 189 190 if (addr_type&IPV6_ADDR_LINKLOCAL) { 191 if (addr_len >= sizeof(struct sockaddr_in6) && 192 usin->sin6_scope_id) { 193 /* If interface is set while binding, indices 194 * must coincide. 195 */ 196 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 197 return -EINVAL; 198 199 sk->sk_bound_dev_if = usin->sin6_scope_id; 200 } 201 202 /* Connect to link-local address requires an interface */ 203 if (!sk->sk_bound_dev_if) 204 return -EINVAL; 205 } 206 207 if (tp->rx_opt.ts_recent_stamp && 208 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 209 tp->rx_opt.ts_recent = 0; 210 tp->rx_opt.ts_recent_stamp = 0; 211 WRITE_ONCE(tp->write_seq, 0); 212 } 213 214 sk->sk_v6_daddr = usin->sin6_addr; 215 np->flow_label = fl6.flowlabel; 216 217 /* 218 * TCP over IPv4 219 */ 220 221 if (addr_type & IPV6_ADDR_MAPPED) { 222 u32 exthdrlen = icsk->icsk_ext_hdr_len; 223 struct sockaddr_in sin; 224 225 if (ipv6_only_sock(sk)) 226 return -ENETUNREACH; 227 228 sin.sin_family = AF_INET; 229 sin.sin_port = usin->sin6_port; 230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 231 232 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 233 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 234 if (sk_is_mptcp(sk)) 235 mptcpv6_handle_mapped(sk, true); 236 sk->sk_backlog_rcv = tcp_v4_do_rcv; 237 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 238 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 239 #endif 240 241 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 242 243 if (err) { 244 icsk->icsk_ext_hdr_len = exthdrlen; 245 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 246 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 247 if (sk_is_mptcp(sk)) 248 mptcpv6_handle_mapped(sk, false); 249 sk->sk_backlog_rcv = tcp_v6_do_rcv; 250 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 251 tp->af_specific = &tcp_sock_ipv6_specific; 252 #endif 253 goto failure; 254 } 255 np->saddr = sk->sk_v6_rcv_saddr; 256 257 return err; 258 } 259 260 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 261 saddr = &sk->sk_v6_rcv_saddr; 262 263 fl6.flowi6_proto = IPPROTO_TCP; 264 fl6.daddr = sk->sk_v6_daddr; 265 fl6.saddr = saddr ? *saddr : np->saddr; 266 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 267 fl6.flowi6_oif = sk->sk_bound_dev_if; 268 fl6.flowi6_mark = sk->sk_mark; 269 fl6.fl6_dport = usin->sin6_port; 270 fl6.fl6_sport = inet->inet_sport; 271 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) 272 fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; 273 fl6.flowi6_uid = sk_uid(sk); 274 275 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 276 final_p = fl6_update_dst(&fl6, opt, &final); 277 278 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 279 280 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 281 if (IS_ERR(dst)) { 282 err = PTR_ERR(dst); 283 goto failure; 284 } 285 286 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 293 if (err) 294 goto failure; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, NULL, NULL); 303 304 icsk->icsk_ext_hdr_len = 0; 305 if (opt) 306 icsk->icsk_ext_hdr_len = opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 328 sk->sk_v6_daddr.s6_addr32); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 372 tcp_sync_mss(sk, dst_mtu(dst)); 373 tcp_simple_retransmit(sk); 374 } 375 } 376 377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 378 u8 type, u8 code, int offset, __be32 info) 379 { 380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 382 struct net *net = dev_net_rcu(skb->dev); 383 struct request_sock *fastopen; 384 struct ipv6_pinfo *np; 385 struct tcp_sock *tp; 386 __u32 seq, snd_una; 387 struct sock *sk; 388 bool fatal; 389 int err; 390 391 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 392 &hdr->saddr, ntohs(th->source), 393 skb->dev->ifindex, inet6_sdif(skb)); 394 395 if (!sk) { 396 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 397 ICMP6_MIB_INERRORS); 398 return -ENOENT; 399 } 400 401 if (sk->sk_state == TCP_TIME_WAIT) { 402 /* To increase the counter of ignored icmps for TCP-AO */ 403 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 404 inet_twsk_put(inet_twsk(sk)); 405 return 0; 406 } 407 seq = ntohl(th->seq); 408 fatal = icmpv6_err_convert(type, code, &err); 409 if (sk->sk_state == TCP_NEW_SYN_RECV) { 410 tcp_req_err(sk, seq, fatal); 411 return 0; 412 } 413 414 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 415 sock_put(sk); 416 return 0; 417 } 418 419 bh_lock_sock(sk); 420 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 421 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 422 423 if (sk->sk_state == TCP_CLOSE) 424 goto out; 425 426 if (static_branch_unlikely(&ip6_min_hopcount)) { 427 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 428 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 429 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 430 goto out; 431 } 432 } 433 434 tp = tcp_sk(sk); 435 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 436 fastopen = rcu_dereference(tp->fastopen_rsk); 437 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 438 if (sk->sk_state != TCP_LISTEN && 439 !between(seq, snd_una, tp->snd_nxt)) { 440 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 441 goto out; 442 } 443 444 np = tcp_inet6_sk(sk); 445 446 if (type == NDISC_REDIRECT) { 447 if (!sock_owned_by_user(sk)) { 448 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 449 450 if (dst) 451 dst->ops->redirect(dst, sk, skb); 452 } 453 goto out; 454 } 455 456 if (type == ICMPV6_PKT_TOOBIG) { 457 u32 mtu = ntohl(info); 458 459 /* We are not interested in TCP_LISTEN and open_requests 460 * (SYN-ACKs send out by Linux are always <576bytes so 461 * they should go through unfragmented). 462 */ 463 if (sk->sk_state == TCP_LISTEN) 464 goto out; 465 466 if (!ip6_sk_accept_pmtu(sk)) 467 goto out; 468 469 if (mtu < IPV6_MIN_MTU) 470 goto out; 471 472 WRITE_ONCE(tp->mtu_info, mtu); 473 474 if (!sock_owned_by_user(sk)) 475 tcp_v6_mtu_reduced(sk); 476 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 477 &sk->sk_tsq_flags)) 478 sock_hold(sk); 479 goto out; 480 } 481 482 483 /* Might be for an request_sock */ 484 switch (sk->sk_state) { 485 case TCP_SYN_SENT: 486 case TCP_SYN_RECV: 487 /* Only in fast or simultaneous open. If a fast open socket is 488 * already accepted it is treated as a connected one below. 489 */ 490 if (fastopen && !fastopen->sk) 491 break; 492 493 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 494 495 if (!sock_owned_by_user(sk)) 496 tcp_done_with_error(sk, err); 497 else 498 WRITE_ONCE(sk->sk_err_soft, err); 499 goto out; 500 case TCP_LISTEN: 501 break; 502 default: 503 /* check if this ICMP message allows revert of backoff. 504 * (see RFC 6069) 505 */ 506 if (!fastopen && type == ICMPV6_DEST_UNREACH && 507 code == ICMPV6_NOROUTE) 508 tcp_ld_RTO_revert(sk, seq); 509 } 510 511 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 512 WRITE_ONCE(sk->sk_err, err); 513 sk_error_report(sk); 514 } else { 515 WRITE_ONCE(sk->sk_err_soft, err); 516 } 517 out: 518 bh_unlock_sock(sk); 519 sock_put(sk); 520 return 0; 521 } 522 523 524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 525 struct flowi *fl, 526 struct request_sock *req, 527 struct tcp_fastopen_cookie *foc, 528 enum tcp_synack_type synack_type, 529 struct sk_buff *syn_skb) 530 { 531 struct inet_request_sock *ireq = inet_rsk(req); 532 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 533 struct ipv6_txoptions *opt; 534 struct flowi6 *fl6 = &fl->u.ip6; 535 struct sk_buff *skb; 536 int err = -ENOMEM; 537 u8 tclass; 538 539 /* First, grab a route. */ 540 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 541 IPPROTO_TCP)) == NULL) 542 goto done; 543 544 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 545 546 if (skb) { 547 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 548 &ireq->ir_v6_rmt_addr); 549 550 fl6->daddr = ireq->ir_v6_rmt_addr; 551 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 552 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 553 554 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 555 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 556 (np->tclass & INET_ECN_MASK) : 557 np->tclass; 558 559 if (!INET_ECN_is_capable(tclass) && 560 tcp_bpf_ca_needs_ecn((struct sock *)req)) 561 tclass |= INET_ECN_ECT_0; 562 563 rcu_read_lock(); 564 opt = ireq->ipv6_opt; 565 if (!opt) 566 opt = rcu_dereference(np->opt); 567 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 568 opt, tclass, READ_ONCE(sk->sk_priority)); 569 rcu_read_unlock(); 570 err = net_xmit_eval(err); 571 } 572 573 done: 574 return err; 575 } 576 577 578 static void tcp_v6_reqsk_destructor(struct request_sock *req) 579 { 580 kfree(inet_rsk(req)->ipv6_opt); 581 consume_skb(inet_rsk(req)->pktopts); 582 } 583 584 #ifdef CONFIG_TCP_MD5SIG 585 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 586 const struct in6_addr *addr, 587 int l3index) 588 { 589 return tcp_md5_do_lookup(sk, l3index, 590 (union tcp_md5_addr *)addr, AF_INET6); 591 } 592 593 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 594 const struct sock *addr_sk) 595 { 596 int l3index; 597 598 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 599 addr_sk->sk_bound_dev_if); 600 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 601 l3index); 602 } 603 604 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 605 sockptr_t optval, int optlen) 606 { 607 struct tcp_md5sig cmd; 608 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 609 union tcp_ao_addr *addr; 610 int l3index = 0; 611 u8 prefixlen; 612 bool l3flag; 613 u8 flags; 614 615 if (optlen < sizeof(cmd)) 616 return -EINVAL; 617 618 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 619 return -EFAULT; 620 621 if (sin6->sin6_family != AF_INET6) 622 return -EINVAL; 623 624 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 625 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 626 627 if (optname == TCP_MD5SIG_EXT && 628 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 629 prefixlen = cmd.tcpm_prefixlen; 630 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 631 prefixlen > 32)) 632 return -EINVAL; 633 } else { 634 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 635 } 636 637 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 638 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 639 struct net_device *dev; 640 641 rcu_read_lock(); 642 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 643 if (dev && netif_is_l3_master(dev)) 644 l3index = dev->ifindex; 645 rcu_read_unlock(); 646 647 /* ok to reference set/not set outside of rcu; 648 * right now device MUST be an L3 master 649 */ 650 if (!dev || !l3index) 651 return -EINVAL; 652 } 653 654 if (!cmd.tcpm_keylen) { 655 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 656 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 657 AF_INET, prefixlen, 658 l3index, flags); 659 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 660 AF_INET6, prefixlen, l3index, flags); 661 } 662 663 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 664 return -EINVAL; 665 666 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 667 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 668 669 /* Don't allow keys for peers that have a matching TCP-AO key. 670 * See the comment in tcp_ao_add_cmd() 671 */ 672 if (tcp_ao_required(sk, addr, AF_INET, 673 l3flag ? l3index : -1, false)) 674 return -EKEYREJECTED; 675 return tcp_md5_do_add(sk, addr, 676 AF_INET, prefixlen, l3index, flags, 677 cmd.tcpm_key, cmd.tcpm_keylen); 678 } 679 680 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 681 682 /* Don't allow keys for peers that have a matching TCP-AO key. 683 * See the comment in tcp_ao_add_cmd() 684 */ 685 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 686 return -EKEYREJECTED; 687 688 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 689 cmd.tcpm_key, cmd.tcpm_keylen); 690 } 691 692 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, 693 const struct in6_addr *daddr, 694 const struct in6_addr *saddr, 695 const struct tcphdr *th, int nbytes) 696 { 697 struct tcp6_pseudohdr *bp; 698 struct scatterlist sg; 699 struct tcphdr *_th; 700 701 bp = hp->scratch; 702 /* 1. TCP pseudo-header (RFC2460) */ 703 bp->saddr = *saddr; 704 bp->daddr = *daddr; 705 bp->protocol = cpu_to_be32(IPPROTO_TCP); 706 bp->len = cpu_to_be32(nbytes); 707 708 _th = (struct tcphdr *)(bp + 1); 709 memcpy(_th, th, sizeof(*th)); 710 _th->check = 0; 711 712 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 713 ahash_request_set_crypt(hp->req, &sg, NULL, 714 sizeof(*bp) + sizeof(*th)); 715 return crypto_ahash_update(hp->req); 716 } 717 718 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 719 const struct in6_addr *daddr, struct in6_addr *saddr, 720 const struct tcphdr *th) 721 { 722 struct tcp_sigpool hp; 723 724 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) 725 goto clear_hash_nostart; 726 727 if (crypto_ahash_init(hp.req)) 728 goto clear_hash; 729 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) 730 goto clear_hash; 731 if (tcp_md5_hash_key(&hp, key)) 732 goto clear_hash; 733 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); 734 if (crypto_ahash_final(hp.req)) 735 goto clear_hash; 736 737 tcp_sigpool_end(&hp); 738 return 0; 739 740 clear_hash: 741 tcp_sigpool_end(&hp); 742 clear_hash_nostart: 743 memset(md5_hash, 0, 16); 744 return 1; 745 } 746 747 static int tcp_v6_md5_hash_skb(char *md5_hash, 748 const struct tcp_md5sig_key *key, 749 const struct sock *sk, 750 const struct sk_buff *skb) 751 { 752 const struct tcphdr *th = tcp_hdr(skb); 753 const struct in6_addr *saddr, *daddr; 754 struct tcp_sigpool hp; 755 756 if (sk) { /* valid for establish/request sockets */ 757 saddr = &sk->sk_v6_rcv_saddr; 758 daddr = &sk->sk_v6_daddr; 759 } else { 760 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 761 saddr = &ip6h->saddr; 762 daddr = &ip6h->daddr; 763 } 764 765 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) 766 goto clear_hash_nostart; 767 768 if (crypto_ahash_init(hp.req)) 769 goto clear_hash; 770 771 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) 772 goto clear_hash; 773 if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) 774 goto clear_hash; 775 if (tcp_md5_hash_key(&hp, key)) 776 goto clear_hash; 777 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); 778 if (crypto_ahash_final(hp.req)) 779 goto clear_hash; 780 781 tcp_sigpool_end(&hp); 782 return 0; 783 784 clear_hash: 785 tcp_sigpool_end(&hp); 786 clear_hash_nostart: 787 memset(md5_hash, 0, 16); 788 return 1; 789 } 790 #endif 791 792 static void tcp_v6_init_req(struct request_sock *req, 793 const struct sock *sk_listener, 794 struct sk_buff *skb, 795 u32 tw_isn) 796 { 797 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 798 struct inet_request_sock *ireq = inet_rsk(req); 799 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 800 801 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 802 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 803 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 804 ireq->ir_loc_addr = LOOPBACK4_IPV6; 805 806 /* So that link locals have meaning */ 807 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 808 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 809 ireq->ir_iif = tcp_v6_iif(skb); 810 811 if (!tw_isn && 812 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 813 np->rxopt.bits.rxinfo || 814 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 815 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 816 refcount_inc(&skb->users); 817 ireq->pktopts = skb; 818 } 819 } 820 821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 822 struct sk_buff *skb, 823 struct flowi *fl, 824 struct request_sock *req, 825 u32 tw_isn) 826 { 827 tcp_v6_init_req(req, sk, skb, tw_isn); 828 829 if (security_inet_conn_request(sk, skb, req)) 830 return NULL; 831 832 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 833 } 834 835 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 836 .family = AF_INET6, 837 .obj_size = sizeof(struct tcp6_request_sock), 838 .send_ack = tcp_v6_reqsk_send_ack, 839 .destructor = tcp_v6_reqsk_destructor, 840 .send_reset = tcp_v6_send_reset, 841 .syn_ack_timeout = tcp_syn_ack_timeout, 842 }; 843 844 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 845 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 846 sizeof(struct ipv6hdr), 847 #ifdef CONFIG_TCP_MD5SIG 848 .req_md5_lookup = tcp_v6_md5_lookup, 849 .calc_md5_hash = tcp_v6_md5_hash_skb, 850 #endif 851 #ifdef CONFIG_TCP_AO 852 .ao_lookup = tcp_v6_ao_lookup_rsk, 853 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 854 .ao_synack_hash = tcp_v6_ao_synack_hash, 855 #endif 856 #ifdef CONFIG_SYN_COOKIES 857 .cookie_init_seq = cookie_v6_init_sequence, 858 #endif 859 .route_req = tcp_v6_route_req, 860 .init_seq = tcp_v6_init_seq, 861 .init_ts_off = tcp_v6_init_ts_off, 862 .send_synack = tcp_v6_send_synack, 863 }; 864 865 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 866 u32 ack, u32 win, u32 tsval, u32 tsecr, 867 int oif, int rst, u8 tclass, __be32 label, 868 u32 priority, u32 txhash, struct tcp_key *key) 869 { 870 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 871 unsigned int tot_len = sizeof(struct tcphdr); 872 struct sock *ctl_sk = net->ipv6.tcp_sk; 873 const struct tcphdr *th = tcp_hdr(skb); 874 __be32 mrst = 0, *topt; 875 struct dst_entry *dst; 876 struct sk_buff *buff; 877 struct tcphdr *t1; 878 struct flowi6 fl6; 879 u32 mark = 0; 880 881 if (tsecr) 882 tot_len += TCPOLEN_TSTAMP_ALIGNED; 883 if (tcp_key_is_md5(key)) 884 tot_len += TCPOLEN_MD5SIG_ALIGNED; 885 if (tcp_key_is_ao(key)) 886 tot_len += tcp_ao_len_aligned(key->ao_key); 887 888 #ifdef CONFIG_MPTCP 889 if (rst && !tcp_key_is_md5(key)) { 890 mrst = mptcp_reset_option(skb); 891 892 if (mrst) 893 tot_len += sizeof(__be32); 894 } 895 #endif 896 897 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 898 if (!buff) 899 return; 900 901 skb_reserve(buff, MAX_TCP_HEADER); 902 903 t1 = skb_push(buff, tot_len); 904 skb_reset_transport_header(buff); 905 906 /* Swap the send and the receive. */ 907 memset(t1, 0, sizeof(*t1)); 908 t1->dest = th->source; 909 t1->source = th->dest; 910 t1->doff = tot_len / 4; 911 t1->seq = htonl(seq); 912 t1->ack_seq = htonl(ack); 913 t1->ack = !rst || !th->ack; 914 t1->rst = rst; 915 t1->window = htons(win); 916 917 topt = (__be32 *)(t1 + 1); 918 919 if (tsecr) { 920 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 921 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 922 *topt++ = htonl(tsval); 923 *topt++ = htonl(tsecr); 924 } 925 926 if (mrst) 927 *topt++ = mrst; 928 929 #ifdef CONFIG_TCP_MD5SIG 930 if (tcp_key_is_md5(key)) { 931 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 932 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 933 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 934 &ipv6_hdr(skb)->saddr, 935 &ipv6_hdr(skb)->daddr, t1); 936 } 937 #endif 938 #ifdef CONFIG_TCP_AO 939 if (tcp_key_is_ao(key)) { 940 *topt++ = htonl((TCPOPT_AO << 24) | 941 (tcp_ao_len(key->ao_key) << 16) | 942 (key->ao_key->sndid << 8) | 943 (key->rcv_next)); 944 945 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 946 key->traffic_key, 947 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 948 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 949 t1, key->sne); 950 } 951 #endif 952 953 memset(&fl6, 0, sizeof(fl6)); 954 fl6.daddr = ipv6_hdr(skb)->saddr; 955 fl6.saddr = ipv6_hdr(skb)->daddr; 956 fl6.flowlabel = label; 957 958 buff->ip_summed = CHECKSUM_PARTIAL; 959 960 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 961 962 fl6.flowi6_proto = IPPROTO_TCP; 963 if (rt6_need_strict(&fl6.daddr) && !oif) 964 fl6.flowi6_oif = tcp_v6_iif(skb); 965 else { 966 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 967 oif = skb->skb_iif; 968 969 fl6.flowi6_oif = oif; 970 } 971 972 if (sk) { 973 /* unconstify the socket only to attach it to buff with care. */ 974 skb_set_owner_edemux(buff, (struct sock *)sk); 975 976 if (sk->sk_state == TCP_TIME_WAIT) 977 mark = inet_twsk(sk)->tw_mark; 978 else 979 mark = READ_ONCE(sk->sk_mark); 980 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 981 } 982 if (txhash) { 983 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 984 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 985 } 986 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 987 fl6.fl6_dport = t1->dest; 988 fl6.fl6_sport = t1->source; 989 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 990 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 991 992 /* Pass a socket to ip6_dst_lookup either it is for RST 993 * Underlying function will use this to retrieve the network 994 * namespace 995 */ 996 if (sk && sk->sk_state != TCP_TIME_WAIT) 997 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 998 else 999 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 1000 if (!IS_ERR(dst)) { 1001 skb_dst_set(buff, dst); 1002 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 1003 tclass, priority); 1004 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 1005 if (rst) 1006 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1007 return; 1008 } 1009 1010 kfree_skb(buff); 1011 } 1012 1013 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 1014 enum sk_rst_reason reason) 1015 { 1016 const struct tcphdr *th = tcp_hdr(skb); 1017 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1018 const __u8 *md5_hash_location = NULL; 1019 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1020 bool allocated_traffic_key = false; 1021 #endif 1022 const struct tcp_ao_hdr *aoh; 1023 struct tcp_key key = {}; 1024 u32 seq = 0, ack_seq = 0; 1025 __be32 label = 0; 1026 u32 priority = 0; 1027 struct net *net; 1028 u32 txhash = 0; 1029 int oif = 0; 1030 #ifdef CONFIG_TCP_MD5SIG 1031 unsigned char newhash[16]; 1032 int genhash; 1033 struct sock *sk1 = NULL; 1034 #endif 1035 1036 if (th->rst) 1037 return; 1038 1039 /* If sk not NULL, it means we did a successful lookup and incoming 1040 * route had to be correct. prequeue might have dropped our dst. 1041 */ 1042 if (!sk && !ipv6_unicast_destination(skb)) 1043 return; 1044 1045 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1046 /* Invalid TCP option size or twice included auth */ 1047 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1048 return; 1049 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1050 rcu_read_lock(); 1051 #endif 1052 #ifdef CONFIG_TCP_MD5SIG 1053 if (sk && sk_fullsock(sk)) { 1054 int l3index; 1055 1056 /* sdif set, means packet ingressed via a device 1057 * in an L3 domain and inet_iif is set to it. 1058 */ 1059 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1060 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1061 if (key.md5_key) 1062 key.type = TCP_KEY_MD5; 1063 } else if (md5_hash_location) { 1064 int dif = tcp_v6_iif_l3_slave(skb); 1065 int sdif = tcp_v6_sdif(skb); 1066 int l3index; 1067 1068 /* 1069 * active side is lost. Try to find listening socket through 1070 * source port, and then find md5 key through listening socket. 1071 * we are not loose security here: 1072 * Incoming packet is checked with md5 hash with finding key, 1073 * no RST generated if md5 hash doesn't match. 1074 */ 1075 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1076 &ipv6h->daddr, ntohs(th->source), 1077 dif, sdif); 1078 if (!sk1) 1079 goto out; 1080 1081 /* sdif set, means packet ingressed via a device 1082 * in an L3 domain and dif is set to it. 1083 */ 1084 l3index = tcp_v6_sdif(skb) ? dif : 0; 1085 1086 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1087 if (!key.md5_key) 1088 goto out; 1089 key.type = TCP_KEY_MD5; 1090 1091 genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1092 if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) 1093 goto out; 1094 } 1095 #endif 1096 1097 if (th->ack) 1098 seq = ntohl(th->ack_seq); 1099 else 1100 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1101 (th->doff << 2); 1102 1103 #ifdef CONFIG_TCP_AO 1104 if (aoh) { 1105 int l3index; 1106 1107 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1108 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1109 &key.ao_key, &key.traffic_key, 1110 &allocated_traffic_key, 1111 &key.rcv_next, &key.sne)) 1112 goto out; 1113 key.type = TCP_KEY_AO; 1114 } 1115 #endif 1116 1117 if (sk) { 1118 oif = sk->sk_bound_dev_if; 1119 if (sk_fullsock(sk)) { 1120 if (inet6_test_bit(REPFLOW, sk)) 1121 label = ip6_flowlabel(ipv6h); 1122 priority = READ_ONCE(sk->sk_priority); 1123 txhash = sk->sk_txhash; 1124 } 1125 if (sk->sk_state == TCP_TIME_WAIT) { 1126 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1127 priority = inet_twsk(sk)->tw_priority; 1128 txhash = inet_twsk(sk)->tw_txhash; 1129 } 1130 } else { 1131 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1132 label = ip6_flowlabel(ipv6h); 1133 } 1134 1135 trace_tcp_send_reset(sk, skb, reason); 1136 1137 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1138 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1139 label, priority, txhash, 1140 &key); 1141 1142 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1143 out: 1144 if (allocated_traffic_key) 1145 kfree(key.traffic_key); 1146 rcu_read_unlock(); 1147 #endif 1148 } 1149 1150 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1151 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1152 struct tcp_key *key, u8 tclass, 1153 __be32 label, u32 priority, u32 txhash) 1154 { 1155 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1156 tclass, label, priority, txhash, key); 1157 } 1158 1159 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1160 enum tcp_tw_status tw_status) 1161 { 1162 struct inet_timewait_sock *tw = inet_twsk(sk); 1163 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1164 u8 tclass = tw->tw_tclass; 1165 struct tcp_key key = {}; 1166 1167 if (tw_status == TCP_TW_ACK_OOW) 1168 tclass &= ~INET_ECN_MASK; 1169 #ifdef CONFIG_TCP_AO 1170 struct tcp_ao_info *ao_info; 1171 1172 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1173 1174 /* FIXME: the segment to-be-acked is not verified yet */ 1175 ao_info = rcu_dereference(tcptw->ao_info); 1176 if (ao_info) { 1177 const struct tcp_ao_hdr *aoh; 1178 1179 /* Invalid TCP option size or twice included auth */ 1180 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1181 goto out; 1182 if (aoh) 1183 key.ao_key = tcp_ao_established_key(sk, ao_info, 1184 aoh->rnext_keyid, -1); 1185 } 1186 } 1187 if (key.ao_key) { 1188 struct tcp_ao_key *rnext_key; 1189 1190 key.traffic_key = snd_other_key(key.ao_key); 1191 /* rcv_next switches to our rcv_next */ 1192 rnext_key = READ_ONCE(ao_info->rnext_key); 1193 key.rcv_next = rnext_key->rcvid; 1194 key.sne = READ_ONCE(ao_info->snd_sne); 1195 key.type = TCP_KEY_AO; 1196 #else 1197 if (0) { 1198 #endif 1199 #ifdef CONFIG_TCP_MD5SIG 1200 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1201 key.md5_key = tcp_twsk_md5_key(tcptw); 1202 if (key.md5_key) 1203 key.type = TCP_KEY_MD5; 1204 #endif 1205 } 1206 1207 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1208 READ_ONCE(tcptw->tw_rcv_nxt), 1209 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1210 tcp_tw_tsval(tcptw), 1211 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1212 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1213 tw->tw_priority, tw->tw_txhash); 1214 1215 #ifdef CONFIG_TCP_AO 1216 out: 1217 #endif 1218 inet_twsk_put(tw); 1219 } 1220 1221 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1222 struct request_sock *req) 1223 { 1224 struct tcp_key key = {}; 1225 1226 #ifdef CONFIG_TCP_AO 1227 if (static_branch_unlikely(&tcp_ao_needed.key) && 1228 tcp_rsk_used_ao(req)) { 1229 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1230 const struct tcp_ao_hdr *aoh; 1231 int l3index; 1232 1233 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1234 /* Invalid TCP option size or twice included auth */ 1235 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1236 return; 1237 if (!aoh) 1238 return; 1239 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1240 (union tcp_ao_addr *)addr, 1241 AF_INET6, aoh->rnext_keyid, -1); 1242 if (unlikely(!key.ao_key)) { 1243 /* Send ACK with any matching MKT for the peer */ 1244 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1245 (union tcp_ao_addr *)addr, 1246 AF_INET6, -1, -1); 1247 /* Matching key disappeared (user removed the key?) 1248 * let the handshake timeout. 1249 */ 1250 if (!key.ao_key) { 1251 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1252 addr, 1253 ntohs(tcp_hdr(skb)->source), 1254 &ipv6_hdr(skb)->daddr, 1255 ntohs(tcp_hdr(skb)->dest)); 1256 return; 1257 } 1258 } 1259 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1260 if (!key.traffic_key) 1261 return; 1262 1263 key.type = TCP_KEY_AO; 1264 key.rcv_next = aoh->keyid; 1265 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1266 #else 1267 if (0) { 1268 #endif 1269 #ifdef CONFIG_TCP_MD5SIG 1270 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1271 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1272 1273 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1274 l3index); 1275 if (key.md5_key) 1276 key.type = TCP_KEY_MD5; 1277 #endif 1278 } 1279 1280 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1281 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1282 */ 1283 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1284 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1285 tcp_rsk(req)->rcv_nxt, 1286 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1287 tcp_rsk_tsval(tcp_rsk(req)), 1288 req->ts_recent, sk->sk_bound_dev_if, 1289 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1290 0, 1291 READ_ONCE(sk->sk_priority), 1292 READ_ONCE(tcp_rsk(req)->txhash)); 1293 if (tcp_key_is_ao(&key)) 1294 kfree(key.traffic_key); 1295 } 1296 1297 1298 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1299 { 1300 #ifdef CONFIG_SYN_COOKIES 1301 const struct tcphdr *th = tcp_hdr(skb); 1302 1303 if (!th->syn) 1304 sk = cookie_v6_check(sk, skb); 1305 #endif 1306 return sk; 1307 } 1308 1309 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1310 struct tcphdr *th, u32 *cookie) 1311 { 1312 u16 mss = 0; 1313 #ifdef CONFIG_SYN_COOKIES 1314 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1315 &tcp_request_sock_ipv6_ops, sk, th); 1316 if (mss) { 1317 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1318 tcp_synq_overflow(sk); 1319 } 1320 #endif 1321 return mss; 1322 } 1323 1324 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1325 { 1326 if (skb->protocol == htons(ETH_P_IP)) 1327 return tcp_v4_conn_request(sk, skb); 1328 1329 if (!ipv6_unicast_destination(skb)) 1330 goto drop; 1331 1332 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1333 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1334 return 0; 1335 } 1336 1337 return tcp_conn_request(&tcp6_request_sock_ops, 1338 &tcp_request_sock_ipv6_ops, sk, skb); 1339 1340 drop: 1341 tcp_listendrop(sk); 1342 return 0; /* don't send reset */ 1343 } 1344 1345 static void tcp_v6_restore_cb(struct sk_buff *skb) 1346 { 1347 /* We need to move header back to the beginning if xfrm6_policy_check() 1348 * and tcp_v6_fill_cb() are going to be called again. 1349 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1350 */ 1351 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1352 sizeof(struct inet6_skb_parm)); 1353 } 1354 1355 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1356 struct request_sock *req, 1357 struct dst_entry *dst, 1358 struct request_sock *req_unhash, 1359 bool *own_req) 1360 { 1361 struct inet_request_sock *ireq; 1362 struct ipv6_pinfo *newnp; 1363 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1364 struct ipv6_txoptions *opt; 1365 struct inet_sock *newinet; 1366 bool found_dup_sk = false; 1367 struct tcp_sock *newtp; 1368 struct sock *newsk; 1369 #ifdef CONFIG_TCP_MD5SIG 1370 struct tcp_md5sig_key *key; 1371 int l3index; 1372 #endif 1373 struct flowi6 fl6; 1374 1375 if (skb->protocol == htons(ETH_P_IP)) { 1376 /* 1377 * v6 mapped 1378 */ 1379 1380 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1381 req_unhash, own_req); 1382 1383 if (!newsk) 1384 return NULL; 1385 1386 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1387 1388 newnp = tcp_inet6_sk(newsk); 1389 newtp = tcp_sk(newsk); 1390 1391 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1392 1393 newnp->saddr = newsk->sk_v6_rcv_saddr; 1394 1395 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1396 if (sk_is_mptcp(newsk)) 1397 mptcpv6_handle_mapped(newsk, true); 1398 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1399 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1400 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1401 #endif 1402 1403 newnp->ipv6_mc_list = NULL; 1404 newnp->ipv6_ac_list = NULL; 1405 newnp->ipv6_fl_list = NULL; 1406 newnp->pktoptions = NULL; 1407 newnp->opt = NULL; 1408 newnp->mcast_oif = inet_iif(skb); 1409 newnp->mcast_hops = ip_hdr(skb)->ttl; 1410 newnp->rcv_flowinfo = 0; 1411 if (inet6_test_bit(REPFLOW, sk)) 1412 newnp->flow_label = 0; 1413 1414 /* 1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1416 * here, tcp_create_openreq_child now does this for us, see the comment in 1417 * that function for the gory details. -acme 1418 */ 1419 1420 /* It is tricky place. Until this moment IPv4 tcp 1421 worked with IPv6 icsk.icsk_af_ops. 1422 Sync it now. 1423 */ 1424 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1425 1426 return newsk; 1427 } 1428 1429 ireq = inet_rsk(req); 1430 1431 if (sk_acceptq_is_full(sk)) 1432 goto exit_overflow; 1433 1434 if (!dst) { 1435 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1436 if (!dst) 1437 goto exit; 1438 } 1439 1440 newsk = tcp_create_openreq_child(sk, req, skb); 1441 if (!newsk) 1442 goto exit_nonewsk; 1443 1444 /* 1445 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1446 * count here, tcp_create_openreq_child now does this for us, see the 1447 * comment in that function for the gory details. -acme 1448 */ 1449 1450 newsk->sk_gso_type = SKB_GSO_TCPV6; 1451 inet6_sk_rx_dst_set(newsk, skb); 1452 1453 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1454 1455 newtp = tcp_sk(newsk); 1456 newinet = inet_sk(newsk); 1457 newnp = tcp_inet6_sk(newsk); 1458 1459 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1460 1461 ip6_dst_store(newsk, dst, NULL, NULL); 1462 1463 newnp->saddr = ireq->ir_v6_loc_addr; 1464 1465 /* Now IPv6 options... 1466 1467 First: no IPv4 options. 1468 */ 1469 newinet->inet_opt = NULL; 1470 newnp->ipv6_mc_list = NULL; 1471 newnp->ipv6_ac_list = NULL; 1472 newnp->ipv6_fl_list = NULL; 1473 1474 /* Clone RX bits */ 1475 newnp->rxopt.all = np->rxopt.all; 1476 1477 newnp->pktoptions = NULL; 1478 newnp->opt = NULL; 1479 newnp->mcast_oif = tcp_v6_iif(skb); 1480 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1481 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1482 if (inet6_test_bit(REPFLOW, sk)) 1483 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1484 1485 /* Set ToS of the new socket based upon the value of incoming SYN. 1486 * ECT bits are set later in tcp_init_transfer(). 1487 */ 1488 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1489 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1490 1491 /* Clone native IPv6 options from listening socket (if any) 1492 1493 Yes, keeping reference count would be much more clever, 1494 but we make one more one thing there: reattach optmem 1495 to newsk. 1496 */ 1497 opt = ireq->ipv6_opt; 1498 if (!opt) 1499 opt = rcu_dereference(np->opt); 1500 if (opt) { 1501 opt = ipv6_dup_options(newsk, opt); 1502 RCU_INIT_POINTER(newnp->opt, opt); 1503 } 1504 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1505 if (opt) 1506 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1507 opt->opt_flen; 1508 1509 tcp_ca_openreq_child(newsk, dst); 1510 1511 tcp_sync_mss(newsk, dst_mtu(dst)); 1512 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1513 1514 tcp_initialize_rcv_mss(newsk); 1515 1516 #ifdef CONFIG_TCP_MD5SIG 1517 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1518 1519 if (!tcp_rsk_used_ao(req)) { 1520 /* Copy over the MD5 key from the original socket */ 1521 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1522 if (key) { 1523 const union tcp_md5_addr *addr; 1524 1525 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1526 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1527 goto put_and_exit; 1528 } 1529 } 1530 #endif 1531 #ifdef CONFIG_TCP_AO 1532 /* Copy over tcp_ao_info if any */ 1533 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1534 goto put_and_exit; /* OOM */ 1535 #endif 1536 1537 if (__inet_inherit_port(sk, newsk) < 0) 1538 goto put_and_exit; 1539 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1540 &found_dup_sk); 1541 if (*own_req) { 1542 tcp_move_syn(newtp, req); 1543 1544 /* Clone pktoptions received with SYN, if we own the req */ 1545 if (ireq->pktopts) { 1546 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1547 consume_skb(ireq->pktopts); 1548 ireq->pktopts = NULL; 1549 if (newnp->pktoptions) 1550 tcp_v6_restore_cb(newnp->pktoptions); 1551 } 1552 } else { 1553 if (!req_unhash && found_dup_sk) { 1554 /* This code path should only be executed in the 1555 * syncookie case only 1556 */ 1557 bh_unlock_sock(newsk); 1558 sock_put(newsk); 1559 newsk = NULL; 1560 } 1561 } 1562 1563 return newsk; 1564 1565 exit_overflow: 1566 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1567 exit_nonewsk: 1568 dst_release(dst); 1569 exit: 1570 tcp_listendrop(sk); 1571 return NULL; 1572 put_and_exit: 1573 inet_csk_prepare_forced_close(newsk); 1574 tcp_done(newsk); 1575 goto exit; 1576 } 1577 1578 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1579 u32)); 1580 /* The socket must have it's spinlock held when we get 1581 * here, unless it is a TCP_LISTEN socket. 1582 * 1583 * We have a potential double-lock case here, so even when 1584 * doing backlog processing we use the BH locking scheme. 1585 * This is because we cannot sleep with the original spinlock 1586 * held. 1587 */ 1588 INDIRECT_CALLABLE_SCOPE 1589 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1590 { 1591 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1592 struct sk_buff *opt_skb = NULL; 1593 enum skb_drop_reason reason; 1594 struct tcp_sock *tp; 1595 1596 /* Imagine: socket is IPv6. IPv4 packet arrives, 1597 goes to IPv4 receive handler and backlogged. 1598 From backlog it always goes here. Kerboom... 1599 Fortunately, tcp_rcv_established and rcv_established 1600 handle them correctly, but it is not case with 1601 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1602 */ 1603 1604 if (skb->protocol == htons(ETH_P_IP)) 1605 return tcp_v4_do_rcv(sk, skb); 1606 1607 /* 1608 * socket locking is here for SMP purposes as backlog rcv 1609 * is currently called with bh processing disabled. 1610 */ 1611 1612 /* Do Stevens' IPV6_PKTOPTIONS. 1613 1614 Yes, guys, it is the only place in our code, where we 1615 may make it not affecting IPv4. 1616 The rest of code is protocol independent, 1617 and I do not like idea to uglify IPv4. 1618 1619 Actually, all the idea behind IPV6_PKTOPTIONS 1620 looks not very well thought. For now we latch 1621 options, received in the last packet, enqueued 1622 by tcp. Feel free to propose better solution. 1623 --ANK (980728) 1624 */ 1625 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1626 opt_skb = skb_clone_and_charge_r(skb, sk); 1627 1628 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1629 struct dst_entry *dst; 1630 1631 dst = rcu_dereference_protected(sk->sk_rx_dst, 1632 lockdep_sock_is_held(sk)); 1633 1634 sock_rps_save_rxhash(sk, skb); 1635 sk_mark_napi_id(sk, skb); 1636 if (dst) { 1637 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1638 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1639 dst, sk->sk_rx_dst_cookie) == NULL) { 1640 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1641 dst_release(dst); 1642 } 1643 } 1644 1645 tcp_rcv_established(sk, skb); 1646 if (opt_skb) 1647 goto ipv6_pktoptions; 1648 return 0; 1649 } 1650 1651 if (tcp_checksum_complete(skb)) 1652 goto csum_err; 1653 1654 if (sk->sk_state == TCP_LISTEN) { 1655 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1656 1657 if (nsk != sk) { 1658 if (nsk) { 1659 reason = tcp_child_process(sk, nsk, skb); 1660 if (reason) 1661 goto reset; 1662 } 1663 return 0; 1664 } 1665 } else 1666 sock_rps_save_rxhash(sk, skb); 1667 1668 reason = tcp_rcv_state_process(sk, skb); 1669 if (reason) 1670 goto reset; 1671 if (opt_skb) 1672 goto ipv6_pktoptions; 1673 return 0; 1674 1675 reset: 1676 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1677 discard: 1678 if (opt_skb) 1679 __kfree_skb(opt_skb); 1680 sk_skb_reason_drop(sk, skb, reason); 1681 return 0; 1682 csum_err: 1683 reason = SKB_DROP_REASON_TCP_CSUM; 1684 trace_tcp_bad_csum(skb); 1685 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1686 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1687 goto discard; 1688 1689 1690 ipv6_pktoptions: 1691 /* Do you ask, what is it? 1692 1693 1. skb was enqueued by tcp. 1694 2. skb is added to tail of read queue, rather than out of order. 1695 3. socket is not in passive state. 1696 4. Finally, it really contains options, which user wants to receive. 1697 */ 1698 tp = tcp_sk(sk); 1699 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1700 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1701 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1702 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1703 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1704 WRITE_ONCE(np->mcast_hops, 1705 ipv6_hdr(opt_skb)->hop_limit); 1706 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1707 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1708 if (inet6_test_bit(REPFLOW, sk)) 1709 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1710 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1711 tcp_v6_restore_cb(opt_skb); 1712 opt_skb = xchg(&np->pktoptions, opt_skb); 1713 } else { 1714 __kfree_skb(opt_skb); 1715 opt_skb = xchg(&np->pktoptions, NULL); 1716 } 1717 } 1718 1719 consume_skb(opt_skb); 1720 return 0; 1721 } 1722 1723 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1724 const struct tcphdr *th) 1725 { 1726 /* This is tricky: we move IP6CB at its correct location into 1727 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1728 * _decode_session6() uses IP6CB(). 1729 * barrier() makes sure compiler won't play aliasing games. 1730 */ 1731 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1732 sizeof(struct inet6_skb_parm)); 1733 barrier(); 1734 1735 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1736 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1737 skb->len - th->doff*4); 1738 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1739 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1740 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1741 TCP_SKB_CB(skb)->sacked = 0; 1742 TCP_SKB_CB(skb)->has_rxtstamp = 1743 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1744 } 1745 1746 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1747 { 1748 struct net *net = dev_net_rcu(skb->dev); 1749 enum skb_drop_reason drop_reason; 1750 enum tcp_tw_status tw_status; 1751 int sdif = inet6_sdif(skb); 1752 int dif = inet6_iif(skb); 1753 const struct tcphdr *th; 1754 const struct ipv6hdr *hdr; 1755 struct sock *sk = NULL; 1756 bool refcounted; 1757 int ret; 1758 u32 isn; 1759 1760 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1761 if (skb->pkt_type != PACKET_HOST) 1762 goto discard_it; 1763 1764 /* 1765 * Count it even if it's bad. 1766 */ 1767 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1768 1769 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1770 goto discard_it; 1771 1772 th = (const struct tcphdr *)skb->data; 1773 1774 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1775 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1776 goto bad_packet; 1777 } 1778 if (!pskb_may_pull(skb, th->doff*4)) 1779 goto discard_it; 1780 1781 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1782 goto csum_error; 1783 1784 th = (const struct tcphdr *)skb->data; 1785 hdr = ipv6_hdr(skb); 1786 1787 lookup: 1788 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1789 th->source, th->dest, inet6_iif(skb), sdif, 1790 &refcounted); 1791 if (!sk) 1792 goto no_tcp_socket; 1793 1794 if (sk->sk_state == TCP_TIME_WAIT) 1795 goto do_time_wait; 1796 1797 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1798 struct request_sock *req = inet_reqsk(sk); 1799 bool req_stolen = false; 1800 struct sock *nsk; 1801 1802 sk = req->rsk_listener; 1803 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1804 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1805 else 1806 drop_reason = tcp_inbound_hash(sk, req, skb, 1807 &hdr->saddr, &hdr->daddr, 1808 AF_INET6, dif, sdif); 1809 if (drop_reason) { 1810 sk_drops_skbadd(sk, skb); 1811 reqsk_put(req); 1812 goto discard_it; 1813 } 1814 if (tcp_checksum_complete(skb)) { 1815 reqsk_put(req); 1816 goto csum_error; 1817 } 1818 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1819 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1820 if (!nsk) { 1821 inet_csk_reqsk_queue_drop_and_put(sk, req); 1822 goto lookup; 1823 } 1824 sk = nsk; 1825 /* reuseport_migrate_sock() has already held one sk_refcnt 1826 * before returning. 1827 */ 1828 } else { 1829 sock_hold(sk); 1830 } 1831 refcounted = true; 1832 nsk = NULL; 1833 if (!tcp_filter(sk, skb, &drop_reason)) { 1834 th = (const struct tcphdr *)skb->data; 1835 hdr = ipv6_hdr(skb); 1836 tcp_v6_fill_cb(skb, hdr, th); 1837 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1838 &drop_reason); 1839 } 1840 if (!nsk) { 1841 reqsk_put(req); 1842 if (req_stolen) { 1843 /* Another cpu got exclusive access to req 1844 * and created a full blown socket. 1845 * Try to feed this packet to this socket 1846 * instead of discarding it. 1847 */ 1848 tcp_v6_restore_cb(skb); 1849 sock_put(sk); 1850 goto lookup; 1851 } 1852 goto discard_and_relse; 1853 } 1854 nf_reset_ct(skb); 1855 if (nsk == sk) { 1856 reqsk_put(req); 1857 tcp_v6_restore_cb(skb); 1858 } else { 1859 drop_reason = tcp_child_process(sk, nsk, skb); 1860 if (drop_reason) { 1861 enum sk_rst_reason rst_reason; 1862 1863 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1864 tcp_v6_send_reset(nsk, skb, rst_reason); 1865 goto discard_and_relse; 1866 } 1867 sock_put(sk); 1868 return 0; 1869 } 1870 } 1871 1872 process: 1873 if (static_branch_unlikely(&ip6_min_hopcount)) { 1874 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1875 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1876 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1877 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1878 goto discard_and_relse; 1879 } 1880 } 1881 1882 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1883 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1884 goto discard_and_relse; 1885 } 1886 1887 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1888 AF_INET6, dif, sdif); 1889 if (drop_reason) 1890 goto discard_and_relse; 1891 1892 nf_reset_ct(skb); 1893 1894 if (tcp_filter(sk, skb, &drop_reason)) 1895 goto discard_and_relse; 1896 1897 th = (const struct tcphdr *)skb->data; 1898 hdr = ipv6_hdr(skb); 1899 tcp_v6_fill_cb(skb, hdr, th); 1900 1901 skb->dev = NULL; 1902 1903 if (sk->sk_state == TCP_LISTEN) { 1904 ret = tcp_v6_do_rcv(sk, skb); 1905 goto put_and_return; 1906 } 1907 1908 sk_incoming_cpu_update(sk); 1909 1910 bh_lock_sock_nested(sk); 1911 tcp_segs_in(tcp_sk(sk), skb); 1912 ret = 0; 1913 if (!sock_owned_by_user(sk)) { 1914 ret = tcp_v6_do_rcv(sk, skb); 1915 } else { 1916 if (tcp_add_backlog(sk, skb, &drop_reason)) 1917 goto discard_and_relse; 1918 } 1919 bh_unlock_sock(sk); 1920 put_and_return: 1921 if (refcounted) 1922 sock_put(sk); 1923 return ret ? -1 : 0; 1924 1925 no_tcp_socket: 1926 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1927 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1928 goto discard_it; 1929 1930 tcp_v6_fill_cb(skb, hdr, th); 1931 1932 if (tcp_checksum_complete(skb)) { 1933 csum_error: 1934 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1935 trace_tcp_bad_csum(skb); 1936 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1937 bad_packet: 1938 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1939 } else { 1940 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1941 } 1942 1943 discard_it: 1944 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1945 sk_skb_reason_drop(sk, skb, drop_reason); 1946 return 0; 1947 1948 discard_and_relse: 1949 sk_drops_skbadd(sk, skb); 1950 if (refcounted) 1951 sock_put(sk); 1952 goto discard_it; 1953 1954 do_time_wait: 1955 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1956 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1957 inet_twsk_put(inet_twsk(sk)); 1958 goto discard_it; 1959 } 1960 1961 tcp_v6_fill_cb(skb, hdr, th); 1962 1963 if (tcp_checksum_complete(skb)) { 1964 inet_twsk_put(inet_twsk(sk)); 1965 goto csum_error; 1966 } 1967 1968 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1969 &drop_reason); 1970 switch (tw_status) { 1971 case TCP_TW_SYN: 1972 { 1973 struct sock *sk2; 1974 1975 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1976 &ipv6_hdr(skb)->saddr, th->source, 1977 &ipv6_hdr(skb)->daddr, 1978 ntohs(th->dest), 1979 tcp_v6_iif_l3_slave(skb), 1980 sdif); 1981 if (sk2) { 1982 struct inet_timewait_sock *tw = inet_twsk(sk); 1983 inet_twsk_deschedule_put(tw); 1984 sk = sk2; 1985 tcp_v6_restore_cb(skb); 1986 refcounted = false; 1987 __this_cpu_write(tcp_tw_isn, isn); 1988 goto process; 1989 } 1990 } 1991 /* to ACK */ 1992 fallthrough; 1993 case TCP_TW_ACK: 1994 case TCP_TW_ACK_OOW: 1995 tcp_v6_timewait_ack(sk, skb, tw_status); 1996 break; 1997 case TCP_TW_RST: 1998 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1999 inet_twsk_deschedule_put(inet_twsk(sk)); 2000 goto discard_it; 2001 case TCP_TW_SUCCESS: 2002 ; 2003 } 2004 goto discard_it; 2005 } 2006 2007 void tcp_v6_early_demux(struct sk_buff *skb) 2008 { 2009 struct net *net = dev_net_rcu(skb->dev); 2010 const struct ipv6hdr *hdr; 2011 const struct tcphdr *th; 2012 struct sock *sk; 2013 2014 if (skb->pkt_type != PACKET_HOST) 2015 return; 2016 2017 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 2018 return; 2019 2020 hdr = ipv6_hdr(skb); 2021 th = tcp_hdr(skb); 2022 2023 if (th->doff < sizeof(struct tcphdr) / 4) 2024 return; 2025 2026 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 2027 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 2028 &hdr->daddr, ntohs(th->dest), 2029 inet6_iif(skb), inet6_sdif(skb)); 2030 if (sk) { 2031 skb->sk = sk; 2032 skb->destructor = sock_edemux; 2033 if (sk_fullsock(sk)) { 2034 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2035 2036 if (dst) 2037 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2038 if (dst && 2039 sk->sk_rx_dst_ifindex == skb->skb_iif) 2040 skb_dst_set_noref(skb, dst); 2041 } 2042 } 2043 } 2044 2045 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2046 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2047 }; 2048 2049 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2050 { 2051 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2052 } 2053 2054 const struct inet_connection_sock_af_ops ipv6_specific = { 2055 .queue_xmit = inet6_csk_xmit, 2056 .send_check = tcp_v6_send_check, 2057 .rebuild_header = inet6_sk_rebuild_header, 2058 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2059 .conn_request = tcp_v6_conn_request, 2060 .syn_recv_sock = tcp_v6_syn_recv_sock, 2061 .net_header_len = sizeof(struct ipv6hdr), 2062 .setsockopt = ipv6_setsockopt, 2063 .getsockopt = ipv6_getsockopt, 2064 .mtu_reduced = tcp_v6_mtu_reduced, 2065 }; 2066 2067 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2068 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2069 #ifdef CONFIG_TCP_MD5SIG 2070 .md5_lookup = tcp_v6_md5_lookup, 2071 .calc_md5_hash = tcp_v6_md5_hash_skb, 2072 .md5_parse = tcp_v6_parse_md5_keys, 2073 #endif 2074 #ifdef CONFIG_TCP_AO 2075 .ao_lookup = tcp_v6_ao_lookup, 2076 .calc_ao_hash = tcp_v6_ao_hash_skb, 2077 .ao_parse = tcp_v6_parse_ao, 2078 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2079 #endif 2080 }; 2081 #endif 2082 2083 /* 2084 * TCP over IPv4 via INET6 API 2085 */ 2086 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2087 .queue_xmit = ip_queue_xmit, 2088 .send_check = tcp_v4_send_check, 2089 .rebuild_header = inet_sk_rebuild_header, 2090 .sk_rx_dst_set = inet_sk_rx_dst_set, 2091 .conn_request = tcp_v6_conn_request, 2092 .syn_recv_sock = tcp_v6_syn_recv_sock, 2093 .net_header_len = sizeof(struct iphdr), 2094 .setsockopt = ipv6_setsockopt, 2095 .getsockopt = ipv6_getsockopt, 2096 .mtu_reduced = tcp_v4_mtu_reduced, 2097 }; 2098 2099 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2100 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2101 #ifdef CONFIG_TCP_MD5SIG 2102 .md5_lookup = tcp_v4_md5_lookup, 2103 .calc_md5_hash = tcp_v4_md5_hash_skb, 2104 .md5_parse = tcp_v6_parse_md5_keys, 2105 #endif 2106 #ifdef CONFIG_TCP_AO 2107 .ao_lookup = tcp_v6_ao_lookup, 2108 .calc_ao_hash = tcp_v4_ao_hash_skb, 2109 .ao_parse = tcp_v6_parse_ao, 2110 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2111 #endif 2112 }; 2113 #endif 2114 2115 /* NOTE: A lot of things set to zero explicitly by call to 2116 * sk_alloc() so need not be done here. 2117 */ 2118 static int tcp_v6_init_sock(struct sock *sk) 2119 { 2120 struct inet_connection_sock *icsk = inet_csk(sk); 2121 2122 tcp_init_sock(sk); 2123 2124 icsk->icsk_af_ops = &ipv6_specific; 2125 2126 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2127 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2128 #endif 2129 2130 return 0; 2131 } 2132 2133 #ifdef CONFIG_PROC_FS 2134 /* Proc filesystem TCPv6 sock list dumping. */ 2135 static void get_openreq6(struct seq_file *seq, 2136 const struct request_sock *req, int i) 2137 { 2138 long ttd = req->rsk_timer.expires - jiffies; 2139 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2140 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2141 2142 if (ttd < 0) 2143 ttd = 0; 2144 2145 seq_printf(seq, 2146 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2147 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2148 i, 2149 src->s6_addr32[0], src->s6_addr32[1], 2150 src->s6_addr32[2], src->s6_addr32[3], 2151 inet_rsk(req)->ir_num, 2152 dest->s6_addr32[0], dest->s6_addr32[1], 2153 dest->s6_addr32[2], dest->s6_addr32[3], 2154 ntohs(inet_rsk(req)->ir_rmt_port), 2155 TCP_SYN_RECV, 2156 0, 0, /* could print option size, but that is af dependent. */ 2157 1, /* timers active (only the expire timer) */ 2158 jiffies_to_clock_t(ttd), 2159 req->num_timeout, 2160 from_kuid_munged(seq_user_ns(seq), 2161 sk_uid(req->rsk_listener)), 2162 0, /* non standard timer */ 2163 0, /* open_requests have no inode */ 2164 0, req); 2165 } 2166 2167 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2168 { 2169 const struct in6_addr *dest, *src; 2170 __u16 destp, srcp; 2171 int timer_active; 2172 unsigned long timer_expires; 2173 const struct inet_sock *inet = inet_sk(sp); 2174 const struct tcp_sock *tp = tcp_sk(sp); 2175 const struct inet_connection_sock *icsk = inet_csk(sp); 2176 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2177 u8 icsk_pending; 2178 int rx_queue; 2179 int state; 2180 2181 dest = &sp->sk_v6_daddr; 2182 src = &sp->sk_v6_rcv_saddr; 2183 destp = ntohs(inet->inet_dport); 2184 srcp = ntohs(inet->inet_sport); 2185 2186 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2187 if (icsk_pending == ICSK_TIME_RETRANS || 2188 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2189 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2190 timer_active = 1; 2191 timer_expires = icsk_timeout(icsk); 2192 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2193 timer_active = 4; 2194 timer_expires = icsk_timeout(icsk); 2195 } else if (timer_pending(&sp->sk_timer)) { 2196 timer_active = 2; 2197 timer_expires = sp->sk_timer.expires; 2198 } else { 2199 timer_active = 0; 2200 timer_expires = jiffies; 2201 } 2202 2203 state = inet_sk_state_load(sp); 2204 if (state == TCP_LISTEN) 2205 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2206 else 2207 /* Because we don't lock the socket, 2208 * we might find a transient negative value. 2209 */ 2210 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2211 READ_ONCE(tp->copied_seq), 0); 2212 2213 seq_printf(seq, 2214 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2215 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2216 i, 2217 src->s6_addr32[0], src->s6_addr32[1], 2218 src->s6_addr32[2], src->s6_addr32[3], srcp, 2219 dest->s6_addr32[0], dest->s6_addr32[1], 2220 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2221 state, 2222 READ_ONCE(tp->write_seq) - tp->snd_una, 2223 rx_queue, 2224 timer_active, 2225 jiffies_delta_to_clock_t(timer_expires - jiffies), 2226 READ_ONCE(icsk->icsk_retransmits), 2227 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2228 READ_ONCE(icsk->icsk_probes_out), 2229 sock_i_ino(sp), 2230 refcount_read(&sp->sk_refcnt), sp, 2231 jiffies_to_clock_t(icsk->icsk_rto), 2232 jiffies_to_clock_t(icsk->icsk_ack.ato), 2233 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2234 tcp_snd_cwnd(tp), 2235 state == TCP_LISTEN ? 2236 fastopenq->max_qlen : 2237 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2238 ); 2239 } 2240 2241 static void get_timewait6_sock(struct seq_file *seq, 2242 struct inet_timewait_sock *tw, int i) 2243 { 2244 long delta = tw->tw_timer.expires - jiffies; 2245 const struct in6_addr *dest, *src; 2246 __u16 destp, srcp; 2247 2248 dest = &tw->tw_v6_daddr; 2249 src = &tw->tw_v6_rcv_saddr; 2250 destp = ntohs(tw->tw_dport); 2251 srcp = ntohs(tw->tw_sport); 2252 2253 seq_printf(seq, 2254 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2255 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2256 i, 2257 src->s6_addr32[0], src->s6_addr32[1], 2258 src->s6_addr32[2], src->s6_addr32[3], srcp, 2259 dest->s6_addr32[0], dest->s6_addr32[1], 2260 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2261 READ_ONCE(tw->tw_substate), 0, 0, 2262 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2263 refcount_read(&tw->tw_refcnt), tw); 2264 } 2265 2266 static int tcp6_seq_show(struct seq_file *seq, void *v) 2267 { 2268 struct tcp_iter_state *st; 2269 struct sock *sk = v; 2270 2271 if (v == SEQ_START_TOKEN) { 2272 seq_puts(seq, 2273 " sl " 2274 "local_address " 2275 "remote_address " 2276 "st tx_queue rx_queue tr tm->when retrnsmt" 2277 " uid timeout inode\n"); 2278 goto out; 2279 } 2280 st = seq->private; 2281 2282 if (sk->sk_state == TCP_TIME_WAIT) 2283 get_timewait6_sock(seq, v, st->num); 2284 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2285 get_openreq6(seq, v, st->num); 2286 else 2287 get_tcp6_sock(seq, v, st->num); 2288 out: 2289 return 0; 2290 } 2291 2292 static const struct seq_operations tcp6_seq_ops = { 2293 .show = tcp6_seq_show, 2294 .start = tcp_seq_start, 2295 .next = tcp_seq_next, 2296 .stop = tcp_seq_stop, 2297 }; 2298 2299 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2300 .family = AF_INET6, 2301 }; 2302 2303 int __net_init tcp6_proc_init(struct net *net) 2304 { 2305 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2306 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2307 return -ENOMEM; 2308 return 0; 2309 } 2310 2311 void tcp6_proc_exit(struct net *net) 2312 { 2313 remove_proc_entry("tcp6", net->proc_net); 2314 } 2315 #endif 2316 2317 struct proto tcpv6_prot = { 2318 .name = "TCPv6", 2319 .owner = THIS_MODULE, 2320 .close = tcp_close, 2321 .pre_connect = tcp_v6_pre_connect, 2322 .connect = tcp_v6_connect, 2323 .disconnect = tcp_disconnect, 2324 .accept = inet_csk_accept, 2325 .ioctl = tcp_ioctl, 2326 .init = tcp_v6_init_sock, 2327 .destroy = tcp_v4_destroy_sock, 2328 .shutdown = tcp_shutdown, 2329 .setsockopt = tcp_setsockopt, 2330 .getsockopt = tcp_getsockopt, 2331 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2332 .keepalive = tcp_set_keepalive, 2333 .recvmsg = tcp_recvmsg, 2334 .sendmsg = tcp_sendmsg, 2335 .splice_eof = tcp_splice_eof, 2336 .backlog_rcv = tcp_v6_do_rcv, 2337 .release_cb = tcp_release_cb, 2338 .hash = inet6_hash, 2339 .unhash = inet_unhash, 2340 .get_port = inet_csk_get_port, 2341 .put_port = inet_put_port, 2342 #ifdef CONFIG_BPF_SYSCALL 2343 .psock_update_sk_prot = tcp_bpf_update_proto, 2344 #endif 2345 .enter_memory_pressure = tcp_enter_memory_pressure, 2346 .leave_memory_pressure = tcp_leave_memory_pressure, 2347 .stream_memory_free = tcp_stream_memory_free, 2348 .sockets_allocated = &tcp_sockets_allocated, 2349 2350 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2351 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2352 2353 .memory_pressure = &tcp_memory_pressure, 2354 .sysctl_mem = sysctl_tcp_mem, 2355 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2356 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2357 .max_header = MAX_TCP_HEADER, 2358 .obj_size = sizeof(struct tcp6_sock), 2359 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2360 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2361 .twsk_prot = &tcp6_timewait_sock_ops, 2362 .rsk_prot = &tcp6_request_sock_ops, 2363 .h.hashinfo = NULL, 2364 .no_autobind = true, 2365 .diag_destroy = tcp_abort, 2366 }; 2367 EXPORT_SYMBOL_GPL(tcpv6_prot); 2368 2369 2370 static struct inet_protosw tcpv6_protosw = { 2371 .type = SOCK_STREAM, 2372 .protocol = IPPROTO_TCP, 2373 .prot = &tcpv6_prot, 2374 .ops = &inet6_stream_ops, 2375 .flags = INET_PROTOSW_PERMANENT | 2376 INET_PROTOSW_ICSK, 2377 }; 2378 2379 static int __net_init tcpv6_net_init(struct net *net) 2380 { 2381 int res; 2382 2383 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2384 SOCK_RAW, IPPROTO_TCP, net); 2385 if (!res) 2386 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2387 2388 return res; 2389 } 2390 2391 static void __net_exit tcpv6_net_exit(struct net *net) 2392 { 2393 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2394 } 2395 2396 static struct pernet_operations tcpv6_net_ops = { 2397 .init = tcpv6_net_init, 2398 .exit = tcpv6_net_exit, 2399 }; 2400 2401 int __init tcpv6_init(void) 2402 { 2403 int ret; 2404 2405 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2406 .handler = tcp_v6_rcv, 2407 .err_handler = tcp_v6_err, 2408 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2409 }; 2410 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2411 if (ret) 2412 goto out; 2413 2414 /* register inet6 protocol */ 2415 ret = inet6_register_protosw(&tcpv6_protosw); 2416 if (ret) 2417 goto out_tcpv6_protocol; 2418 2419 ret = register_pernet_subsys(&tcpv6_net_ops); 2420 if (ret) 2421 goto out_tcpv6_protosw; 2422 2423 ret = mptcpv6_init(); 2424 if (ret) 2425 goto out_tcpv6_pernet_subsys; 2426 2427 out: 2428 return ret; 2429 2430 out_tcpv6_pernet_subsys: 2431 unregister_pernet_subsys(&tcpv6_net_ops); 2432 out_tcpv6_protosw: 2433 inet6_unregister_protosw(&tcpv6_protosw); 2434 out_tcpv6_protocol: 2435 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2436 goto out; 2437 } 2438 2439 void tcpv6_exit(void) 2440 { 2441 unregister_pernet_subsys(&tcpv6_net_ops); 2442 inet6_unregister_protosw(&tcpv6_protosw); 2443 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2444 } 2445