1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/hash.h> 71 #include <linux/scatterlist.h> 72 73 #include <trace/events/tcp.h> 74 75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 76 enum sk_rst_reason reason); 77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 78 struct request_sock *req); 79 80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 81 82 static const struct inet_connection_sock_af_ops ipv6_mapped; 83 const struct inet_connection_sock_af_ops ipv6_specific; 84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 87 #endif 88 89 /* Helper returning the inet6 address from a given tcp socket. 90 * It can be used in TCP stack instead of inet6_sk(sk). 91 * This avoids a dereference and allow compiler optimizations. 92 * It is a specialized version of inet6_sk_generic(). 93 */ 94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 95 struct tcp6_sock, tcp)->inet6) 96 97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb_dst(skb); 100 101 if (dst && dst_hold_safe(dst)) { 102 rcu_assign_pointer(sk->sk_rx_dst, dst); 103 sk->sk_rx_dst_ifindex = skb->skb_iif; 104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 105 } 106 } 107 108 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 109 { 110 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 111 ipv6_hdr(skb)->saddr.s6_addr32, 112 tcp_hdr(skb)->dest, 113 tcp_hdr(skb)->source); 114 } 115 116 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 117 { 118 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32); 120 } 121 122 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 123 int addr_len) 124 { 125 /* This check is replicated from tcp_v6_connect() and intended to 126 * prevent BPF program called below from accessing bytes that are out 127 * of the bound specified by user in addr_len. 128 */ 129 if (addr_len < SIN6_LEN_RFC2133) 130 return -EINVAL; 131 132 sock_owned_by_me(sk); 133 134 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 135 } 136 137 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 138 int addr_len) 139 { 140 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 141 struct inet_connection_sock *icsk = inet_csk(sk); 142 struct in6_addr *saddr = NULL, *final_p, final; 143 struct inet_timewait_death_row *tcp_death_row; 144 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 145 struct inet_sock *inet = inet_sk(sk); 146 struct tcp_sock *tp = tcp_sk(sk); 147 struct net *net = sock_net(sk); 148 struct ipv6_txoptions *opt; 149 struct dst_entry *dst; 150 struct flowi6 fl6; 151 int addr_type; 152 int err; 153 154 if (addr_len < SIN6_LEN_RFC2133) 155 return -EINVAL; 156 157 if (usin->sin6_family != AF_INET6) 158 return -EAFNOSUPPORT; 159 160 memset(&fl6, 0, sizeof(fl6)); 161 162 if (inet6_test_bit(SNDFLOW, sk)) { 163 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 164 IP6_ECN_flow_init(fl6.flowlabel); 165 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 166 struct ip6_flowlabel *flowlabel; 167 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 168 if (IS_ERR(flowlabel)) 169 return -EINVAL; 170 fl6_sock_release(flowlabel); 171 } 172 } 173 174 /* 175 * connect() to INADDR_ANY means loopback (BSD'ism). 176 */ 177 178 if (ipv6_addr_any(&usin->sin6_addr)) { 179 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 180 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 181 &usin->sin6_addr); 182 else 183 usin->sin6_addr = in6addr_loopback; 184 } 185 186 addr_type = ipv6_addr_type(&usin->sin6_addr); 187 188 if (addr_type & IPV6_ADDR_MULTICAST) 189 return -ENETUNREACH; 190 191 if (addr_type&IPV6_ADDR_LINKLOCAL) { 192 if (addr_len >= sizeof(struct sockaddr_in6) && 193 usin->sin6_scope_id) { 194 /* If interface is set while binding, indices 195 * must coincide. 196 */ 197 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 198 return -EINVAL; 199 200 sk->sk_bound_dev_if = usin->sin6_scope_id; 201 } 202 203 /* Connect to link-local address requires an interface */ 204 if (!sk->sk_bound_dev_if) 205 return -EINVAL; 206 } 207 208 if (tp->rx_opt.ts_recent_stamp && 209 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 210 tp->rx_opt.ts_recent = 0; 211 tp->rx_opt.ts_recent_stamp = 0; 212 WRITE_ONCE(tp->write_seq, 0); 213 } 214 215 sk->sk_v6_daddr = usin->sin6_addr; 216 np->flow_label = fl6.flowlabel; 217 218 /* 219 * TCP over IPv4 220 */ 221 222 if (addr_type & IPV6_ADDR_MAPPED) { 223 u32 exthdrlen = icsk->icsk_ext_hdr_len; 224 struct sockaddr_in sin; 225 226 if (ipv6_only_sock(sk)) 227 return -ENETUNREACH; 228 229 sin.sin_family = AF_INET; 230 sin.sin_port = usin->sin6_port; 231 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 232 233 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 234 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 235 if (sk_is_mptcp(sk)) 236 mptcpv6_handle_mapped(sk, true); 237 sk->sk_backlog_rcv = tcp_v4_do_rcv; 238 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 239 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 240 #endif 241 242 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 243 244 if (err) { 245 icsk->icsk_ext_hdr_len = exthdrlen; 246 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 247 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 248 if (sk_is_mptcp(sk)) 249 mptcpv6_handle_mapped(sk, false); 250 sk->sk_backlog_rcv = tcp_v6_do_rcv; 251 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 252 tp->af_specific = &tcp_sock_ipv6_specific; 253 #endif 254 goto failure; 255 } 256 np->saddr = sk->sk_v6_rcv_saddr; 257 258 return err; 259 } 260 261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 262 saddr = &sk->sk_v6_rcv_saddr; 263 264 fl6.flowi6_proto = IPPROTO_TCP; 265 fl6.daddr = sk->sk_v6_daddr; 266 fl6.saddr = saddr ? *saddr : np->saddr; 267 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 268 fl6.flowi6_oif = sk->sk_bound_dev_if; 269 fl6.flowi6_mark = sk->sk_mark; 270 fl6.fl6_dport = usin->sin6_port; 271 fl6.fl6_sport = inet->inet_sport; 272 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) 273 fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; 274 fl6.flowi6_uid = sk_uid(sk); 275 276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 277 final_p = fl6_update_dst(&fl6, opt, &final); 278 279 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 280 281 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 282 if (IS_ERR(dst)) { 283 err = PTR_ERR(dst); 284 goto failure; 285 } 286 287 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 288 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 289 290 if (!saddr) { 291 saddr = &fl6.saddr; 292 293 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 294 if (err) 295 goto failure; 296 } 297 298 /* set the source address */ 299 np->saddr = *saddr; 300 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 301 302 sk->sk_gso_type = SKB_GSO_TCPV6; 303 ip6_dst_store(sk, dst, false, false); 304 305 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 306 if (opt) 307 icsk->icsk_ext_hdr_len += opt->opt_flen + 308 opt->opt_nflen; 309 310 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 311 312 inet->inet_dport = usin->sin6_port; 313 314 tcp_set_state(sk, TCP_SYN_SENT); 315 err = inet6_hash_connect(tcp_death_row, sk); 316 if (err) 317 goto late_failure; 318 319 sk_set_txhash(sk); 320 321 if (likely(!tp->repair)) { 322 if (!tp->write_seq) 323 WRITE_ONCE(tp->write_seq, 324 secure_tcpv6_seq(np->saddr.s6_addr32, 325 sk->sk_v6_daddr.s6_addr32, 326 inet->inet_sport, 327 inet->inet_dport)); 328 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 329 sk->sk_v6_daddr.s6_addr32); 330 } 331 332 if (tcp_fastopen_defer_connect(sk, &err)) 333 return err; 334 if (err) 335 goto late_failure; 336 337 err = tcp_connect(sk); 338 if (err) 339 goto late_failure; 340 341 return 0; 342 343 late_failure: 344 tcp_set_state(sk, TCP_CLOSE); 345 inet_bhash2_reset_saddr(sk); 346 failure: 347 inet->inet_dport = 0; 348 sk->sk_route_caps = 0; 349 return err; 350 } 351 352 static void tcp_v6_mtu_reduced(struct sock *sk) 353 { 354 struct dst_entry *dst; 355 u32 mtu; 356 357 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 358 return; 359 360 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 361 362 /* Drop requests trying to increase our current mss. 363 * Check done in __ip6_rt_update_pmtu() is too late. 364 */ 365 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 366 return; 367 368 dst = inet6_csk_update_pmtu(sk, mtu); 369 if (!dst) 370 return; 371 372 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 373 tcp_sync_mss(sk, dst_mtu(dst)); 374 tcp_simple_retransmit(sk); 375 } 376 } 377 378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 379 u8 type, u8 code, int offset, __be32 info) 380 { 381 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 382 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 383 struct net *net = dev_net_rcu(skb->dev); 384 struct request_sock *fastopen; 385 struct ipv6_pinfo *np; 386 struct tcp_sock *tp; 387 __u32 seq, snd_una; 388 struct sock *sk; 389 bool fatal; 390 int err; 391 392 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 393 &hdr->saddr, ntohs(th->source), 394 skb->dev->ifindex, inet6_sdif(skb)); 395 396 if (!sk) { 397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 398 ICMP6_MIB_INERRORS); 399 return -ENOENT; 400 } 401 402 if (sk->sk_state == TCP_TIME_WAIT) { 403 /* To increase the counter of ignored icmps for TCP-AO */ 404 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 405 inet_twsk_put(inet_twsk(sk)); 406 return 0; 407 } 408 seq = ntohl(th->seq); 409 fatal = icmpv6_err_convert(type, code, &err); 410 if (sk->sk_state == TCP_NEW_SYN_RECV) { 411 tcp_req_err(sk, seq, fatal); 412 return 0; 413 } 414 415 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 416 sock_put(sk); 417 return 0; 418 } 419 420 bh_lock_sock(sk); 421 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 422 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 423 424 if (sk->sk_state == TCP_CLOSE) 425 goto out; 426 427 if (static_branch_unlikely(&ip6_min_hopcount)) { 428 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 429 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 430 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 431 goto out; 432 } 433 } 434 435 tp = tcp_sk(sk); 436 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 437 fastopen = rcu_dereference(tp->fastopen_rsk); 438 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 439 if (sk->sk_state != TCP_LISTEN && 440 !between(seq, snd_una, tp->snd_nxt)) { 441 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 442 goto out; 443 } 444 445 np = tcp_inet6_sk(sk); 446 447 if (type == NDISC_REDIRECT) { 448 if (!sock_owned_by_user(sk)) { 449 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 450 451 if (dst) 452 dst->ops->redirect(dst, sk, skb); 453 } 454 goto out; 455 } 456 457 if (type == ICMPV6_PKT_TOOBIG) { 458 u32 mtu = ntohl(info); 459 460 /* We are not interested in TCP_LISTEN and open_requests 461 * (SYN-ACKs send out by Linux are always <576bytes so 462 * they should go through unfragmented). 463 */ 464 if (sk->sk_state == TCP_LISTEN) 465 goto out; 466 467 if (!ip6_sk_accept_pmtu(sk)) 468 goto out; 469 470 if (mtu < IPV6_MIN_MTU) 471 goto out; 472 473 WRITE_ONCE(tp->mtu_info, mtu); 474 475 if (!sock_owned_by_user(sk)) 476 tcp_v6_mtu_reduced(sk); 477 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 478 &sk->sk_tsq_flags)) 479 sock_hold(sk); 480 goto out; 481 } 482 483 484 /* Might be for an request_sock */ 485 switch (sk->sk_state) { 486 case TCP_SYN_SENT: 487 case TCP_SYN_RECV: 488 /* Only in fast or simultaneous open. If a fast open socket is 489 * already accepted it is treated as a connected one below. 490 */ 491 if (fastopen && !fastopen->sk) 492 break; 493 494 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 495 496 if (!sock_owned_by_user(sk)) 497 tcp_done_with_error(sk, err); 498 else 499 WRITE_ONCE(sk->sk_err_soft, err); 500 goto out; 501 case TCP_LISTEN: 502 break; 503 default: 504 /* check if this ICMP message allows revert of backoff. 505 * (see RFC 6069) 506 */ 507 if (!fastopen && type == ICMPV6_DEST_UNREACH && 508 code == ICMPV6_NOROUTE) 509 tcp_ld_RTO_revert(sk, seq); 510 } 511 512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 513 WRITE_ONCE(sk->sk_err, err); 514 sk_error_report(sk); 515 } else { 516 WRITE_ONCE(sk->sk_err_soft, err); 517 } 518 out: 519 bh_unlock_sock(sk); 520 sock_put(sk); 521 return 0; 522 } 523 524 525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 526 struct flowi *fl, 527 struct request_sock *req, 528 struct tcp_fastopen_cookie *foc, 529 enum tcp_synack_type synack_type, 530 struct sk_buff *syn_skb) 531 { 532 struct inet_request_sock *ireq = inet_rsk(req); 533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 534 struct ipv6_txoptions *opt; 535 struct flowi6 *fl6 = &fl->u.ip6; 536 struct sk_buff *skb; 537 int err = -ENOMEM; 538 u8 tclass; 539 540 /* First, grab a route. */ 541 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 542 IPPROTO_TCP)) == NULL) 543 goto done; 544 545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 546 547 if (skb) { 548 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 549 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 550 &ireq->ir_v6_rmt_addr); 551 552 fl6->daddr = ireq->ir_v6_rmt_addr; 553 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 554 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 555 556 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 557 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 558 (np->tclass & INET_ECN_MASK) : 559 np->tclass; 560 561 if (!INET_ECN_is_capable(tclass) && 562 tcp_bpf_ca_needs_ecn((struct sock *)req)) 563 tclass |= INET_ECN_ECT_0; 564 565 rcu_read_lock(); 566 opt = ireq->ipv6_opt; 567 if (!opt) 568 opt = rcu_dereference(np->opt); 569 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 570 opt, tclass, READ_ONCE(sk->sk_priority)); 571 rcu_read_unlock(); 572 err = net_xmit_eval(err); 573 } 574 575 done: 576 return err; 577 } 578 579 580 static void tcp_v6_reqsk_destructor(struct request_sock *req) 581 { 582 kfree(inet_rsk(req)->ipv6_opt); 583 consume_skb(inet_rsk(req)->pktopts); 584 } 585 586 #ifdef CONFIG_TCP_MD5SIG 587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 588 const struct in6_addr *addr, 589 int l3index) 590 { 591 return tcp_md5_do_lookup(sk, l3index, 592 (union tcp_md5_addr *)addr, AF_INET6); 593 } 594 595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 596 const struct sock *addr_sk) 597 { 598 int l3index; 599 600 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 601 addr_sk->sk_bound_dev_if); 602 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 603 l3index); 604 } 605 606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 607 sockptr_t optval, int optlen) 608 { 609 struct tcp_md5sig cmd; 610 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 611 union tcp_ao_addr *addr; 612 int l3index = 0; 613 u8 prefixlen; 614 bool l3flag; 615 u8 flags; 616 617 if (optlen < sizeof(cmd)) 618 return -EINVAL; 619 620 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 621 return -EFAULT; 622 623 if (sin6->sin6_family != AF_INET6) 624 return -EINVAL; 625 626 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 627 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 628 629 if (optname == TCP_MD5SIG_EXT && 630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 631 prefixlen = cmd.tcpm_prefixlen; 632 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 633 prefixlen > 32)) 634 return -EINVAL; 635 } else { 636 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 637 } 638 639 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 640 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 641 struct net_device *dev; 642 643 rcu_read_lock(); 644 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 645 if (dev && netif_is_l3_master(dev)) 646 l3index = dev->ifindex; 647 rcu_read_unlock(); 648 649 /* ok to reference set/not set outside of rcu; 650 * right now device MUST be an L3 master 651 */ 652 if (!dev || !l3index) 653 return -EINVAL; 654 } 655 656 if (!cmd.tcpm_keylen) { 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, 660 l3index, flags); 661 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 662 AF_INET6, prefixlen, l3index, flags); 663 } 664 665 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 666 return -EINVAL; 667 668 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 669 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 670 671 /* Don't allow keys for peers that have a matching TCP-AO key. 672 * See the comment in tcp_ao_add_cmd() 673 */ 674 if (tcp_ao_required(sk, addr, AF_INET, 675 l3flag ? l3index : -1, false)) 676 return -EKEYREJECTED; 677 return tcp_md5_do_add(sk, addr, 678 AF_INET, prefixlen, l3index, flags, 679 cmd.tcpm_key, cmd.tcpm_keylen); 680 } 681 682 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 683 684 /* Don't allow keys for peers that have a matching TCP-AO key. 685 * See the comment in tcp_ao_add_cmd() 686 */ 687 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 688 return -EKEYREJECTED; 689 690 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 691 cmd.tcpm_key, cmd.tcpm_keylen); 692 } 693 694 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, 695 const struct in6_addr *daddr, 696 const struct in6_addr *saddr, 697 const struct tcphdr *th, int nbytes) 698 { 699 struct tcp6_pseudohdr *bp; 700 struct scatterlist sg; 701 struct tcphdr *_th; 702 703 bp = hp->scratch; 704 /* 1. TCP pseudo-header (RFC2460) */ 705 bp->saddr = *saddr; 706 bp->daddr = *daddr; 707 bp->protocol = cpu_to_be32(IPPROTO_TCP); 708 bp->len = cpu_to_be32(nbytes); 709 710 _th = (struct tcphdr *)(bp + 1); 711 memcpy(_th, th, sizeof(*th)); 712 _th->check = 0; 713 714 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 715 ahash_request_set_crypt(hp->req, &sg, NULL, 716 sizeof(*bp) + sizeof(*th)); 717 return crypto_ahash_update(hp->req); 718 } 719 720 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 721 const struct in6_addr *daddr, struct in6_addr *saddr, 722 const struct tcphdr *th) 723 { 724 struct tcp_sigpool hp; 725 726 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) 727 goto clear_hash_nostart; 728 729 if (crypto_ahash_init(hp.req)) 730 goto clear_hash; 731 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) 732 goto clear_hash; 733 if (tcp_md5_hash_key(&hp, key)) 734 goto clear_hash; 735 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); 736 if (crypto_ahash_final(hp.req)) 737 goto clear_hash; 738 739 tcp_sigpool_end(&hp); 740 return 0; 741 742 clear_hash: 743 tcp_sigpool_end(&hp); 744 clear_hash_nostart: 745 memset(md5_hash, 0, 16); 746 return 1; 747 } 748 749 static int tcp_v6_md5_hash_skb(char *md5_hash, 750 const struct tcp_md5sig_key *key, 751 const struct sock *sk, 752 const struct sk_buff *skb) 753 { 754 const struct tcphdr *th = tcp_hdr(skb); 755 const struct in6_addr *saddr, *daddr; 756 struct tcp_sigpool hp; 757 758 if (sk) { /* valid for establish/request sockets */ 759 saddr = &sk->sk_v6_rcv_saddr; 760 daddr = &sk->sk_v6_daddr; 761 } else { 762 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 763 saddr = &ip6h->saddr; 764 daddr = &ip6h->daddr; 765 } 766 767 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) 768 goto clear_hash_nostart; 769 770 if (crypto_ahash_init(hp.req)) 771 goto clear_hash; 772 773 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) 774 goto clear_hash; 775 if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) 776 goto clear_hash; 777 if (tcp_md5_hash_key(&hp, key)) 778 goto clear_hash; 779 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); 780 if (crypto_ahash_final(hp.req)) 781 goto clear_hash; 782 783 tcp_sigpool_end(&hp); 784 return 0; 785 786 clear_hash: 787 tcp_sigpool_end(&hp); 788 clear_hash_nostart: 789 memset(md5_hash, 0, 16); 790 return 1; 791 } 792 #endif 793 794 static void tcp_v6_init_req(struct request_sock *req, 795 const struct sock *sk_listener, 796 struct sk_buff *skb, 797 u32 tw_isn) 798 { 799 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 800 struct inet_request_sock *ireq = inet_rsk(req); 801 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 802 803 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 804 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 805 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 806 ireq->ir_loc_addr = LOOPBACK4_IPV6; 807 808 /* So that link locals have meaning */ 809 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 810 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 811 ireq->ir_iif = tcp_v6_iif(skb); 812 813 if (!tw_isn && 814 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 815 np->rxopt.bits.rxinfo || 816 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 817 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 818 refcount_inc(&skb->users); 819 ireq->pktopts = skb; 820 } 821 } 822 823 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 824 struct sk_buff *skb, 825 struct flowi *fl, 826 struct request_sock *req, 827 u32 tw_isn) 828 { 829 tcp_v6_init_req(req, sk, skb, tw_isn); 830 831 if (security_inet_conn_request(sk, skb, req)) 832 return NULL; 833 834 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 835 } 836 837 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 838 .family = AF_INET6, 839 .obj_size = sizeof(struct tcp6_request_sock), 840 .send_ack = tcp_v6_reqsk_send_ack, 841 .destructor = tcp_v6_reqsk_destructor, 842 .send_reset = tcp_v6_send_reset, 843 .syn_ack_timeout = tcp_syn_ack_timeout, 844 }; 845 846 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 847 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 848 sizeof(struct ipv6hdr), 849 #ifdef CONFIG_TCP_MD5SIG 850 .req_md5_lookup = tcp_v6_md5_lookup, 851 .calc_md5_hash = tcp_v6_md5_hash_skb, 852 #endif 853 #ifdef CONFIG_TCP_AO 854 .ao_lookup = tcp_v6_ao_lookup_rsk, 855 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 856 .ao_synack_hash = tcp_v6_ao_synack_hash, 857 #endif 858 #ifdef CONFIG_SYN_COOKIES 859 .cookie_init_seq = cookie_v6_init_sequence, 860 #endif 861 .route_req = tcp_v6_route_req, 862 .init_seq = tcp_v6_init_seq, 863 .init_ts_off = tcp_v6_init_ts_off, 864 .send_synack = tcp_v6_send_synack, 865 }; 866 867 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 868 u32 ack, u32 win, u32 tsval, u32 tsecr, 869 int oif, int rst, u8 tclass, __be32 label, 870 u32 priority, u32 txhash, struct tcp_key *key) 871 { 872 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 873 unsigned int tot_len = sizeof(struct tcphdr); 874 struct sock *ctl_sk = net->ipv6.tcp_sk; 875 const struct tcphdr *th = tcp_hdr(skb); 876 __be32 mrst = 0, *topt; 877 struct dst_entry *dst; 878 struct sk_buff *buff; 879 struct tcphdr *t1; 880 struct flowi6 fl6; 881 u32 mark = 0; 882 883 if (tsecr) 884 tot_len += TCPOLEN_TSTAMP_ALIGNED; 885 if (tcp_key_is_md5(key)) 886 tot_len += TCPOLEN_MD5SIG_ALIGNED; 887 if (tcp_key_is_ao(key)) 888 tot_len += tcp_ao_len_aligned(key->ao_key); 889 890 #ifdef CONFIG_MPTCP 891 if (rst && !tcp_key_is_md5(key)) { 892 mrst = mptcp_reset_option(skb); 893 894 if (mrst) 895 tot_len += sizeof(__be32); 896 } 897 #endif 898 899 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 900 if (!buff) 901 return; 902 903 skb_reserve(buff, MAX_TCP_HEADER); 904 905 t1 = skb_push(buff, tot_len); 906 skb_reset_transport_header(buff); 907 908 /* Swap the send and the receive. */ 909 memset(t1, 0, sizeof(*t1)); 910 t1->dest = th->source; 911 t1->source = th->dest; 912 t1->doff = tot_len / 4; 913 t1->seq = htonl(seq); 914 t1->ack_seq = htonl(ack); 915 t1->ack = !rst || !th->ack; 916 t1->rst = rst; 917 t1->window = htons(win); 918 919 topt = (__be32 *)(t1 + 1); 920 921 if (tsecr) { 922 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 923 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 924 *topt++ = htonl(tsval); 925 *topt++ = htonl(tsecr); 926 } 927 928 if (mrst) 929 *topt++ = mrst; 930 931 #ifdef CONFIG_TCP_MD5SIG 932 if (tcp_key_is_md5(key)) { 933 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 934 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 935 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 936 &ipv6_hdr(skb)->saddr, 937 &ipv6_hdr(skb)->daddr, t1); 938 } 939 #endif 940 #ifdef CONFIG_TCP_AO 941 if (tcp_key_is_ao(key)) { 942 *topt++ = htonl((TCPOPT_AO << 24) | 943 (tcp_ao_len(key->ao_key) << 16) | 944 (key->ao_key->sndid << 8) | 945 (key->rcv_next)); 946 947 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 948 key->traffic_key, 949 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 950 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 951 t1, key->sne); 952 } 953 #endif 954 955 memset(&fl6, 0, sizeof(fl6)); 956 fl6.daddr = ipv6_hdr(skb)->saddr; 957 fl6.saddr = ipv6_hdr(skb)->daddr; 958 fl6.flowlabel = label; 959 960 buff->ip_summed = CHECKSUM_PARTIAL; 961 962 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 963 964 fl6.flowi6_proto = IPPROTO_TCP; 965 if (rt6_need_strict(&fl6.daddr) && !oif) 966 fl6.flowi6_oif = tcp_v6_iif(skb); 967 else { 968 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 969 oif = skb->skb_iif; 970 971 fl6.flowi6_oif = oif; 972 } 973 974 if (sk) { 975 /* unconstify the socket only to attach it to buff with care. */ 976 skb_set_owner_edemux(buff, (struct sock *)sk); 977 psp_reply_set_decrypted(buff); 978 979 if (sk->sk_state == TCP_TIME_WAIT) 980 mark = inet_twsk(sk)->tw_mark; 981 else 982 mark = READ_ONCE(sk->sk_mark); 983 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 984 } 985 if (txhash) { 986 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 987 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 988 } 989 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 990 fl6.fl6_dport = t1->dest; 991 fl6.fl6_sport = t1->source; 992 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 993 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 994 995 /* Pass a socket to ip6_dst_lookup either it is for RST 996 * Underlying function will use this to retrieve the network 997 * namespace 998 */ 999 if (sk && sk->sk_state != TCP_TIME_WAIT) 1000 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 1001 else 1002 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 1003 if (!IS_ERR(dst)) { 1004 skb_dst_set(buff, dst); 1005 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 1006 tclass, priority); 1007 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 1008 if (rst) 1009 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1010 return; 1011 } 1012 1013 kfree_skb(buff); 1014 } 1015 1016 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 1017 enum sk_rst_reason reason) 1018 { 1019 const struct tcphdr *th = tcp_hdr(skb); 1020 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1021 const __u8 *md5_hash_location = NULL; 1022 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1023 bool allocated_traffic_key = false; 1024 #endif 1025 const struct tcp_ao_hdr *aoh; 1026 struct tcp_key key = {}; 1027 u32 seq = 0, ack_seq = 0; 1028 __be32 label = 0; 1029 u32 priority = 0; 1030 struct net *net; 1031 u32 txhash = 0; 1032 int oif = 0; 1033 #ifdef CONFIG_TCP_MD5SIG 1034 unsigned char newhash[16]; 1035 int genhash; 1036 struct sock *sk1 = NULL; 1037 #endif 1038 1039 if (th->rst) 1040 return; 1041 1042 /* If sk not NULL, it means we did a successful lookup and incoming 1043 * route had to be correct. prequeue might have dropped our dst. 1044 */ 1045 if (!sk && !ipv6_unicast_destination(skb)) 1046 return; 1047 1048 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1049 /* Invalid TCP option size or twice included auth */ 1050 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1051 return; 1052 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1053 rcu_read_lock(); 1054 #endif 1055 #ifdef CONFIG_TCP_MD5SIG 1056 if (sk && sk_fullsock(sk)) { 1057 int l3index; 1058 1059 /* sdif set, means packet ingressed via a device 1060 * in an L3 domain and inet_iif is set to it. 1061 */ 1062 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1063 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1064 if (key.md5_key) 1065 key.type = TCP_KEY_MD5; 1066 } else if (md5_hash_location) { 1067 int dif = tcp_v6_iif_l3_slave(skb); 1068 int sdif = tcp_v6_sdif(skb); 1069 int l3index; 1070 1071 /* 1072 * active side is lost. Try to find listening socket through 1073 * source port, and then find md5 key through listening socket. 1074 * we are not loose security here: 1075 * Incoming packet is checked with md5 hash with finding key, 1076 * no RST generated if md5 hash doesn't match. 1077 */ 1078 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1079 &ipv6h->daddr, ntohs(th->source), 1080 dif, sdif); 1081 if (!sk1) 1082 goto out; 1083 1084 /* sdif set, means packet ingressed via a device 1085 * in an L3 domain and dif is set to it. 1086 */ 1087 l3index = tcp_v6_sdif(skb) ? dif : 0; 1088 1089 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1090 if (!key.md5_key) 1091 goto out; 1092 key.type = TCP_KEY_MD5; 1093 1094 genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1095 if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) 1096 goto out; 1097 } 1098 #endif 1099 1100 if (th->ack) 1101 seq = ntohl(th->ack_seq); 1102 else 1103 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1104 (th->doff << 2); 1105 1106 #ifdef CONFIG_TCP_AO 1107 if (aoh) { 1108 int l3index; 1109 1110 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1111 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1112 &key.ao_key, &key.traffic_key, 1113 &allocated_traffic_key, 1114 &key.rcv_next, &key.sne)) 1115 goto out; 1116 key.type = TCP_KEY_AO; 1117 } 1118 #endif 1119 1120 if (sk) { 1121 oif = sk->sk_bound_dev_if; 1122 if (sk_fullsock(sk)) { 1123 if (inet6_test_bit(REPFLOW, sk)) 1124 label = ip6_flowlabel(ipv6h); 1125 priority = READ_ONCE(sk->sk_priority); 1126 txhash = sk->sk_txhash; 1127 } 1128 if (sk->sk_state == TCP_TIME_WAIT) { 1129 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1130 priority = inet_twsk(sk)->tw_priority; 1131 txhash = inet_twsk(sk)->tw_txhash; 1132 } 1133 } else { 1134 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1135 label = ip6_flowlabel(ipv6h); 1136 } 1137 1138 trace_tcp_send_reset(sk, skb, reason); 1139 1140 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1141 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1142 label, priority, txhash, 1143 &key); 1144 1145 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1146 out: 1147 if (allocated_traffic_key) 1148 kfree(key.traffic_key); 1149 rcu_read_unlock(); 1150 #endif 1151 } 1152 1153 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1154 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1155 struct tcp_key *key, u8 tclass, 1156 __be32 label, u32 priority, u32 txhash) 1157 { 1158 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1159 tclass, label, priority, txhash, key); 1160 } 1161 1162 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1163 enum tcp_tw_status tw_status) 1164 { 1165 struct inet_timewait_sock *tw = inet_twsk(sk); 1166 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1167 u8 tclass = tw->tw_tclass; 1168 struct tcp_key key = {}; 1169 1170 if (tw_status == TCP_TW_ACK_OOW) 1171 tclass &= ~INET_ECN_MASK; 1172 #ifdef CONFIG_TCP_AO 1173 struct tcp_ao_info *ao_info; 1174 1175 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1176 1177 /* FIXME: the segment to-be-acked is not verified yet */ 1178 ao_info = rcu_dereference(tcptw->ao_info); 1179 if (ao_info) { 1180 const struct tcp_ao_hdr *aoh; 1181 1182 /* Invalid TCP option size or twice included auth */ 1183 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1184 goto out; 1185 if (aoh) 1186 key.ao_key = tcp_ao_established_key(sk, ao_info, 1187 aoh->rnext_keyid, -1); 1188 } 1189 } 1190 if (key.ao_key) { 1191 struct tcp_ao_key *rnext_key; 1192 1193 key.traffic_key = snd_other_key(key.ao_key); 1194 /* rcv_next switches to our rcv_next */ 1195 rnext_key = READ_ONCE(ao_info->rnext_key); 1196 key.rcv_next = rnext_key->rcvid; 1197 key.sne = READ_ONCE(ao_info->snd_sne); 1198 key.type = TCP_KEY_AO; 1199 #else 1200 if (0) { 1201 #endif 1202 #ifdef CONFIG_TCP_MD5SIG 1203 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1204 key.md5_key = tcp_twsk_md5_key(tcptw); 1205 if (key.md5_key) 1206 key.type = TCP_KEY_MD5; 1207 #endif 1208 } 1209 1210 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1211 READ_ONCE(tcptw->tw_rcv_nxt), 1212 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1213 tcp_tw_tsval(tcptw), 1214 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1215 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1216 tw->tw_priority, tw->tw_txhash); 1217 1218 #ifdef CONFIG_TCP_AO 1219 out: 1220 #endif 1221 inet_twsk_put(tw); 1222 } 1223 1224 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1225 struct request_sock *req) 1226 { 1227 struct tcp_key key = {}; 1228 1229 #ifdef CONFIG_TCP_AO 1230 if (static_branch_unlikely(&tcp_ao_needed.key) && 1231 tcp_rsk_used_ao(req)) { 1232 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1233 const struct tcp_ao_hdr *aoh; 1234 int l3index; 1235 1236 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1237 /* Invalid TCP option size or twice included auth */ 1238 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1239 return; 1240 if (!aoh) 1241 return; 1242 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1243 (union tcp_ao_addr *)addr, 1244 AF_INET6, aoh->rnext_keyid, -1); 1245 if (unlikely(!key.ao_key)) { 1246 /* Send ACK with any matching MKT for the peer */ 1247 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1248 (union tcp_ao_addr *)addr, 1249 AF_INET6, -1, -1); 1250 /* Matching key disappeared (user removed the key?) 1251 * let the handshake timeout. 1252 */ 1253 if (!key.ao_key) { 1254 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1255 addr, 1256 ntohs(tcp_hdr(skb)->source), 1257 &ipv6_hdr(skb)->daddr, 1258 ntohs(tcp_hdr(skb)->dest)); 1259 return; 1260 } 1261 } 1262 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1263 if (!key.traffic_key) 1264 return; 1265 1266 key.type = TCP_KEY_AO; 1267 key.rcv_next = aoh->keyid; 1268 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1269 #else 1270 if (0) { 1271 #endif 1272 #ifdef CONFIG_TCP_MD5SIG 1273 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1274 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1275 1276 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1277 l3index); 1278 if (key.md5_key) 1279 key.type = TCP_KEY_MD5; 1280 #endif 1281 } 1282 1283 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1284 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1285 */ 1286 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1287 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1288 tcp_rsk(req)->rcv_nxt, 1289 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1290 tcp_rsk_tsval(tcp_rsk(req)), 1291 req->ts_recent, sk->sk_bound_dev_if, 1292 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1293 0, 1294 READ_ONCE(sk->sk_priority), 1295 READ_ONCE(tcp_rsk(req)->txhash)); 1296 if (tcp_key_is_ao(&key)) 1297 kfree(key.traffic_key); 1298 } 1299 1300 1301 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1302 { 1303 #ifdef CONFIG_SYN_COOKIES 1304 const struct tcphdr *th = tcp_hdr(skb); 1305 1306 if (!th->syn) 1307 sk = cookie_v6_check(sk, skb); 1308 #endif 1309 return sk; 1310 } 1311 1312 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1313 struct tcphdr *th, u32 *cookie) 1314 { 1315 u16 mss = 0; 1316 #ifdef CONFIG_SYN_COOKIES 1317 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1318 &tcp_request_sock_ipv6_ops, sk, th); 1319 if (mss) { 1320 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1321 tcp_synq_overflow(sk); 1322 } 1323 #endif 1324 return mss; 1325 } 1326 1327 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1328 { 1329 if (skb->protocol == htons(ETH_P_IP)) 1330 return tcp_v4_conn_request(sk, skb); 1331 1332 if (!ipv6_unicast_destination(skb)) 1333 goto drop; 1334 1335 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1336 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1337 return 0; 1338 } 1339 1340 return tcp_conn_request(&tcp6_request_sock_ops, 1341 &tcp_request_sock_ipv6_ops, sk, skb); 1342 1343 drop: 1344 tcp_listendrop(sk); 1345 return 0; /* don't send reset */ 1346 } 1347 1348 static void tcp_v6_restore_cb(struct sk_buff *skb) 1349 { 1350 /* We need to move header back to the beginning if xfrm6_policy_check() 1351 * and tcp_v6_fill_cb() are going to be called again. 1352 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1353 */ 1354 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1355 sizeof(struct inet6_skb_parm)); 1356 } 1357 1358 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1359 struct request_sock *req, 1360 struct dst_entry *dst, 1361 struct request_sock *req_unhash, 1362 bool *own_req) 1363 { 1364 struct inet_request_sock *ireq; 1365 struct ipv6_pinfo *newnp; 1366 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1367 struct ipv6_txoptions *opt; 1368 struct inet_sock *newinet; 1369 bool found_dup_sk = false; 1370 struct tcp_sock *newtp; 1371 struct sock *newsk; 1372 #ifdef CONFIG_TCP_MD5SIG 1373 struct tcp_md5sig_key *key; 1374 int l3index; 1375 #endif 1376 struct flowi6 fl6; 1377 1378 if (skb->protocol == htons(ETH_P_IP)) { 1379 /* 1380 * v6 mapped 1381 */ 1382 1383 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1384 req_unhash, own_req); 1385 1386 if (!newsk) 1387 return NULL; 1388 1389 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1390 1391 newnp = tcp_inet6_sk(newsk); 1392 newtp = tcp_sk(newsk); 1393 1394 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1395 1396 newnp->saddr = newsk->sk_v6_rcv_saddr; 1397 1398 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1399 if (sk_is_mptcp(newsk)) 1400 mptcpv6_handle_mapped(newsk, true); 1401 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1402 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1403 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1404 #endif 1405 1406 newnp->ipv6_mc_list = NULL; 1407 newnp->ipv6_ac_list = NULL; 1408 newnp->ipv6_fl_list = NULL; 1409 newnp->pktoptions = NULL; 1410 newnp->opt = NULL; 1411 newnp->mcast_oif = inet_iif(skb); 1412 newnp->mcast_hops = ip_hdr(skb)->ttl; 1413 newnp->rcv_flowinfo = 0; 1414 if (inet6_test_bit(REPFLOW, sk)) 1415 newnp->flow_label = 0; 1416 1417 /* 1418 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1419 * here, tcp_create_openreq_child now does this for us, see the comment in 1420 * that function for the gory details. -acme 1421 */ 1422 1423 /* It is tricky place. Until this moment IPv4 tcp 1424 worked with IPv6 icsk.icsk_af_ops. 1425 Sync it now. 1426 */ 1427 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1428 1429 return newsk; 1430 } 1431 1432 ireq = inet_rsk(req); 1433 1434 if (sk_acceptq_is_full(sk)) 1435 goto exit_overflow; 1436 1437 if (!dst) { 1438 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1439 if (!dst) 1440 goto exit; 1441 } 1442 1443 newsk = tcp_create_openreq_child(sk, req, skb); 1444 if (!newsk) 1445 goto exit_nonewsk; 1446 1447 /* 1448 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1449 * count here, tcp_create_openreq_child now does this for us, see the 1450 * comment in that function for the gory details. -acme 1451 */ 1452 1453 newsk->sk_gso_type = SKB_GSO_TCPV6; 1454 inet6_sk_rx_dst_set(newsk, skb); 1455 1456 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1457 1458 newtp = tcp_sk(newsk); 1459 newinet = inet_sk(newsk); 1460 newnp = tcp_inet6_sk(newsk); 1461 1462 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1463 1464 ip6_dst_store(newsk, dst, false, false); 1465 1466 newnp->saddr = ireq->ir_v6_loc_addr; 1467 1468 /* Now IPv6 options... 1469 1470 First: no IPv4 options. 1471 */ 1472 newinet->inet_opt = NULL; 1473 newnp->ipv6_mc_list = NULL; 1474 newnp->ipv6_ac_list = NULL; 1475 newnp->ipv6_fl_list = NULL; 1476 1477 /* Clone RX bits */ 1478 newnp->rxopt.all = np->rxopt.all; 1479 1480 newnp->pktoptions = NULL; 1481 newnp->opt = NULL; 1482 newnp->mcast_oif = tcp_v6_iif(skb); 1483 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1484 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1485 if (inet6_test_bit(REPFLOW, sk)) 1486 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1487 1488 /* Set ToS of the new socket based upon the value of incoming SYN. 1489 * ECT bits are set later in tcp_init_transfer(). 1490 */ 1491 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1492 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1493 1494 /* Clone native IPv6 options from listening socket (if any) 1495 1496 Yes, keeping reference count would be much more clever, 1497 but we make one more one thing there: reattach optmem 1498 to newsk. 1499 */ 1500 opt = ireq->ipv6_opt; 1501 if (!opt) 1502 opt = rcu_dereference(np->opt); 1503 if (opt) { 1504 opt = ipv6_dup_options(newsk, opt); 1505 RCU_INIT_POINTER(newnp->opt, opt); 1506 } 1507 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1508 if (opt) 1509 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1510 opt->opt_flen; 1511 1512 tcp_ca_openreq_child(newsk, dst); 1513 1514 tcp_sync_mss(newsk, dst_mtu(dst)); 1515 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1516 1517 tcp_initialize_rcv_mss(newsk); 1518 1519 #ifdef CONFIG_TCP_MD5SIG 1520 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1521 1522 if (!tcp_rsk_used_ao(req)) { 1523 /* Copy over the MD5 key from the original socket */ 1524 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1525 if (key) { 1526 const union tcp_md5_addr *addr; 1527 1528 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1529 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1530 goto put_and_exit; 1531 } 1532 } 1533 #endif 1534 #ifdef CONFIG_TCP_AO 1535 /* Copy over tcp_ao_info if any */ 1536 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1537 goto put_and_exit; /* OOM */ 1538 #endif 1539 1540 if (__inet_inherit_port(sk, newsk) < 0) 1541 goto put_and_exit; 1542 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1543 &found_dup_sk); 1544 if (*own_req) { 1545 tcp_move_syn(newtp, req); 1546 1547 /* Clone pktoptions received with SYN, if we own the req */ 1548 if (ireq->pktopts) { 1549 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1550 consume_skb(ireq->pktopts); 1551 ireq->pktopts = NULL; 1552 if (newnp->pktoptions) 1553 tcp_v6_restore_cb(newnp->pktoptions); 1554 } 1555 } else { 1556 if (!req_unhash && found_dup_sk) { 1557 /* This code path should only be executed in the 1558 * syncookie case only 1559 */ 1560 bh_unlock_sock(newsk); 1561 sock_put(newsk); 1562 newsk = NULL; 1563 } 1564 } 1565 1566 return newsk; 1567 1568 exit_overflow: 1569 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1570 exit_nonewsk: 1571 dst_release(dst); 1572 exit: 1573 tcp_listendrop(sk); 1574 return NULL; 1575 put_and_exit: 1576 inet_csk_prepare_forced_close(newsk); 1577 tcp_done(newsk); 1578 goto exit; 1579 } 1580 1581 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1582 u32)); 1583 /* The socket must have it's spinlock held when we get 1584 * here, unless it is a TCP_LISTEN socket. 1585 * 1586 * We have a potential double-lock case here, so even when 1587 * doing backlog processing we use the BH locking scheme. 1588 * This is because we cannot sleep with the original spinlock 1589 * held. 1590 */ 1591 INDIRECT_CALLABLE_SCOPE 1592 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1593 { 1594 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1595 struct sk_buff *opt_skb = NULL; 1596 enum skb_drop_reason reason; 1597 struct tcp_sock *tp; 1598 1599 /* Imagine: socket is IPv6. IPv4 packet arrives, 1600 goes to IPv4 receive handler and backlogged. 1601 From backlog it always goes here. Kerboom... 1602 Fortunately, tcp_rcv_established and rcv_established 1603 handle them correctly, but it is not case with 1604 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1605 */ 1606 1607 if (skb->protocol == htons(ETH_P_IP)) 1608 return tcp_v4_do_rcv(sk, skb); 1609 1610 reason = psp_sk_rx_policy_check(sk, skb); 1611 if (reason) 1612 goto err_discard; 1613 1614 /* 1615 * socket locking is here for SMP purposes as backlog rcv 1616 * is currently called with bh processing disabled. 1617 */ 1618 1619 /* Do Stevens' IPV6_PKTOPTIONS. 1620 1621 Yes, guys, it is the only place in our code, where we 1622 may make it not affecting IPv4. 1623 The rest of code is protocol independent, 1624 and I do not like idea to uglify IPv4. 1625 1626 Actually, all the idea behind IPV6_PKTOPTIONS 1627 looks not very well thought. For now we latch 1628 options, received in the last packet, enqueued 1629 by tcp. Feel free to propose better solution. 1630 --ANK (980728) 1631 */ 1632 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1633 opt_skb = skb_clone_and_charge_r(skb, sk); 1634 1635 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1636 struct dst_entry *dst; 1637 1638 dst = rcu_dereference_protected(sk->sk_rx_dst, 1639 lockdep_sock_is_held(sk)); 1640 1641 sock_rps_save_rxhash(sk, skb); 1642 sk_mark_napi_id(sk, skb); 1643 if (dst) { 1644 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1645 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1646 dst, sk->sk_rx_dst_cookie) == NULL) { 1647 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1648 dst_release(dst); 1649 } 1650 } 1651 1652 tcp_rcv_established(sk, skb); 1653 if (opt_skb) 1654 goto ipv6_pktoptions; 1655 return 0; 1656 } 1657 1658 if (tcp_checksum_complete(skb)) 1659 goto csum_err; 1660 1661 if (sk->sk_state == TCP_LISTEN) { 1662 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1663 1664 if (nsk != sk) { 1665 if (nsk) { 1666 reason = tcp_child_process(sk, nsk, skb); 1667 if (reason) 1668 goto reset; 1669 } 1670 return 0; 1671 } 1672 } else 1673 sock_rps_save_rxhash(sk, skb); 1674 1675 reason = tcp_rcv_state_process(sk, skb); 1676 if (reason) 1677 goto reset; 1678 if (opt_skb) 1679 goto ipv6_pktoptions; 1680 return 0; 1681 1682 reset: 1683 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1684 discard: 1685 if (opt_skb) 1686 __kfree_skb(opt_skb); 1687 sk_skb_reason_drop(sk, skb, reason); 1688 return 0; 1689 csum_err: 1690 reason = SKB_DROP_REASON_TCP_CSUM; 1691 trace_tcp_bad_csum(skb); 1692 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1693 err_discard: 1694 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1695 goto discard; 1696 1697 1698 ipv6_pktoptions: 1699 /* Do you ask, what is it? 1700 1701 1. skb was enqueued by tcp. 1702 2. skb is added to tail of read queue, rather than out of order. 1703 3. socket is not in passive state. 1704 4. Finally, it really contains options, which user wants to receive. 1705 */ 1706 tp = tcp_sk(sk); 1707 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1708 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1709 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1710 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1711 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1712 WRITE_ONCE(np->mcast_hops, 1713 ipv6_hdr(opt_skb)->hop_limit); 1714 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1715 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1716 if (inet6_test_bit(REPFLOW, sk)) 1717 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1718 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1719 tcp_v6_restore_cb(opt_skb); 1720 opt_skb = xchg(&np->pktoptions, opt_skb); 1721 } else { 1722 __kfree_skb(opt_skb); 1723 opt_skb = xchg(&np->pktoptions, NULL); 1724 } 1725 } 1726 1727 consume_skb(opt_skb); 1728 return 0; 1729 } 1730 1731 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1732 const struct tcphdr *th) 1733 { 1734 /* This is tricky: we move IP6CB at its correct location into 1735 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1736 * _decode_session6() uses IP6CB(). 1737 * barrier() makes sure compiler won't play aliasing games. 1738 */ 1739 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1740 sizeof(struct inet6_skb_parm)); 1741 barrier(); 1742 1743 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1744 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1745 skb->len - th->doff*4); 1746 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1747 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1748 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1749 TCP_SKB_CB(skb)->sacked = 0; 1750 TCP_SKB_CB(skb)->has_rxtstamp = 1751 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1752 } 1753 1754 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1755 { 1756 struct net *net = dev_net_rcu(skb->dev); 1757 enum skb_drop_reason drop_reason; 1758 enum tcp_tw_status tw_status; 1759 int sdif = inet6_sdif(skb); 1760 int dif = inet6_iif(skb); 1761 const struct tcphdr *th; 1762 const struct ipv6hdr *hdr; 1763 struct sock *sk = NULL; 1764 bool refcounted; 1765 int ret; 1766 u32 isn; 1767 1768 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1769 if (skb->pkt_type != PACKET_HOST) 1770 goto discard_it; 1771 1772 /* 1773 * Count it even if it's bad. 1774 */ 1775 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1776 1777 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1778 goto discard_it; 1779 1780 th = (const struct tcphdr *)skb->data; 1781 1782 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1783 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1784 goto bad_packet; 1785 } 1786 if (!pskb_may_pull(skb, th->doff*4)) 1787 goto discard_it; 1788 1789 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1790 goto csum_error; 1791 1792 th = (const struct tcphdr *)skb->data; 1793 hdr = ipv6_hdr(skb); 1794 1795 lookup: 1796 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1797 th->source, th->dest, inet6_iif(skb), sdif, 1798 &refcounted); 1799 if (!sk) 1800 goto no_tcp_socket; 1801 1802 if (sk->sk_state == TCP_TIME_WAIT) 1803 goto do_time_wait; 1804 1805 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1806 struct request_sock *req = inet_reqsk(sk); 1807 bool req_stolen = false; 1808 struct sock *nsk; 1809 1810 sk = req->rsk_listener; 1811 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1812 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1813 else 1814 drop_reason = tcp_inbound_hash(sk, req, skb, 1815 &hdr->saddr, &hdr->daddr, 1816 AF_INET6, dif, sdif); 1817 if (drop_reason) { 1818 sk_drops_skbadd(sk, skb); 1819 reqsk_put(req); 1820 goto discard_it; 1821 } 1822 if (tcp_checksum_complete(skb)) { 1823 reqsk_put(req); 1824 goto csum_error; 1825 } 1826 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1827 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1828 if (!nsk) { 1829 inet_csk_reqsk_queue_drop_and_put(sk, req); 1830 goto lookup; 1831 } 1832 sk = nsk; 1833 /* reuseport_migrate_sock() has already held one sk_refcnt 1834 * before returning. 1835 */ 1836 } else { 1837 sock_hold(sk); 1838 } 1839 refcounted = true; 1840 nsk = NULL; 1841 if (!tcp_filter(sk, skb, &drop_reason)) { 1842 th = (const struct tcphdr *)skb->data; 1843 hdr = ipv6_hdr(skb); 1844 tcp_v6_fill_cb(skb, hdr, th); 1845 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1846 &drop_reason); 1847 } 1848 if (!nsk) { 1849 reqsk_put(req); 1850 if (req_stolen) { 1851 /* Another cpu got exclusive access to req 1852 * and created a full blown socket. 1853 * Try to feed this packet to this socket 1854 * instead of discarding it. 1855 */ 1856 tcp_v6_restore_cb(skb); 1857 sock_put(sk); 1858 goto lookup; 1859 } 1860 goto discard_and_relse; 1861 } 1862 nf_reset_ct(skb); 1863 if (nsk == sk) { 1864 reqsk_put(req); 1865 tcp_v6_restore_cb(skb); 1866 } else { 1867 drop_reason = tcp_child_process(sk, nsk, skb); 1868 if (drop_reason) { 1869 enum sk_rst_reason rst_reason; 1870 1871 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1872 tcp_v6_send_reset(nsk, skb, rst_reason); 1873 goto discard_and_relse; 1874 } 1875 sock_put(sk); 1876 return 0; 1877 } 1878 } 1879 1880 process: 1881 if (static_branch_unlikely(&ip6_min_hopcount)) { 1882 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1883 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1884 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1885 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1886 goto discard_and_relse; 1887 } 1888 } 1889 1890 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1891 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1892 goto discard_and_relse; 1893 } 1894 1895 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1896 AF_INET6, dif, sdif); 1897 if (drop_reason) 1898 goto discard_and_relse; 1899 1900 nf_reset_ct(skb); 1901 1902 if (tcp_filter(sk, skb, &drop_reason)) 1903 goto discard_and_relse; 1904 1905 th = (const struct tcphdr *)skb->data; 1906 hdr = ipv6_hdr(skb); 1907 tcp_v6_fill_cb(skb, hdr, th); 1908 1909 skb->dev = NULL; 1910 1911 if (sk->sk_state == TCP_LISTEN) { 1912 ret = tcp_v6_do_rcv(sk, skb); 1913 goto put_and_return; 1914 } 1915 1916 sk_incoming_cpu_update(sk); 1917 1918 bh_lock_sock_nested(sk); 1919 tcp_segs_in(tcp_sk(sk), skb); 1920 ret = 0; 1921 if (!sock_owned_by_user(sk)) { 1922 ret = tcp_v6_do_rcv(sk, skb); 1923 } else { 1924 if (tcp_add_backlog(sk, skb, &drop_reason)) 1925 goto discard_and_relse; 1926 } 1927 bh_unlock_sock(sk); 1928 put_and_return: 1929 if (refcounted) 1930 sock_put(sk); 1931 return ret ? -1 : 0; 1932 1933 no_tcp_socket: 1934 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1935 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1936 goto discard_it; 1937 1938 tcp_v6_fill_cb(skb, hdr, th); 1939 1940 if (tcp_checksum_complete(skb)) { 1941 csum_error: 1942 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1943 trace_tcp_bad_csum(skb); 1944 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1945 bad_packet: 1946 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1947 } else { 1948 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1949 } 1950 1951 discard_it: 1952 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1953 sk_skb_reason_drop(sk, skb, drop_reason); 1954 return 0; 1955 1956 discard_and_relse: 1957 sk_drops_skbadd(sk, skb); 1958 if (refcounted) 1959 sock_put(sk); 1960 goto discard_it; 1961 1962 do_time_wait: 1963 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1964 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1965 inet_twsk_put(inet_twsk(sk)); 1966 goto discard_it; 1967 } 1968 1969 tcp_v6_fill_cb(skb, hdr, th); 1970 1971 if (tcp_checksum_complete(skb)) { 1972 inet_twsk_put(inet_twsk(sk)); 1973 goto csum_error; 1974 } 1975 1976 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1977 &drop_reason); 1978 switch (tw_status) { 1979 case TCP_TW_SYN: 1980 { 1981 struct sock *sk2; 1982 1983 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1984 &ipv6_hdr(skb)->saddr, th->source, 1985 &ipv6_hdr(skb)->daddr, 1986 ntohs(th->dest), 1987 tcp_v6_iif_l3_slave(skb), 1988 sdif); 1989 if (sk2) { 1990 struct inet_timewait_sock *tw = inet_twsk(sk); 1991 inet_twsk_deschedule_put(tw); 1992 sk = sk2; 1993 tcp_v6_restore_cb(skb); 1994 refcounted = false; 1995 __this_cpu_write(tcp_tw_isn, isn); 1996 goto process; 1997 } 1998 1999 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 2000 if (drop_reason) 2001 break; 2002 } 2003 /* to ACK */ 2004 fallthrough; 2005 case TCP_TW_ACK: 2006 case TCP_TW_ACK_OOW: 2007 tcp_v6_timewait_ack(sk, skb, tw_status); 2008 break; 2009 case TCP_TW_RST: 2010 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 2011 inet_twsk_deschedule_put(inet_twsk(sk)); 2012 goto discard_it; 2013 case TCP_TW_SUCCESS: 2014 ; 2015 } 2016 goto discard_it; 2017 } 2018 2019 void tcp_v6_early_demux(struct sk_buff *skb) 2020 { 2021 struct net *net = dev_net_rcu(skb->dev); 2022 const struct ipv6hdr *hdr; 2023 const struct tcphdr *th; 2024 struct sock *sk; 2025 2026 if (skb->pkt_type != PACKET_HOST) 2027 return; 2028 2029 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 2030 return; 2031 2032 hdr = ipv6_hdr(skb); 2033 th = tcp_hdr(skb); 2034 2035 if (th->doff < sizeof(struct tcphdr) / 4) 2036 return; 2037 2038 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 2039 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 2040 &hdr->daddr, ntohs(th->dest), 2041 inet6_iif(skb), inet6_sdif(skb)); 2042 if (sk) { 2043 skb->sk = sk; 2044 skb->destructor = sock_edemux; 2045 if (sk_fullsock(sk)) { 2046 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2047 2048 if (dst) 2049 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2050 if (dst && 2051 sk->sk_rx_dst_ifindex == skb->skb_iif) 2052 skb_dst_set_noref(skb, dst); 2053 } 2054 } 2055 } 2056 2057 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2058 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2059 }; 2060 2061 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2062 { 2063 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2064 } 2065 2066 const struct inet_connection_sock_af_ops ipv6_specific = { 2067 .queue_xmit = inet6_csk_xmit, 2068 .send_check = tcp_v6_send_check, 2069 .rebuild_header = inet6_sk_rebuild_header, 2070 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2071 .conn_request = tcp_v6_conn_request, 2072 .syn_recv_sock = tcp_v6_syn_recv_sock, 2073 .net_header_len = sizeof(struct ipv6hdr), 2074 .setsockopt = ipv6_setsockopt, 2075 .getsockopt = ipv6_getsockopt, 2076 .mtu_reduced = tcp_v6_mtu_reduced, 2077 }; 2078 2079 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2080 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2081 #ifdef CONFIG_TCP_MD5SIG 2082 .md5_lookup = tcp_v6_md5_lookup, 2083 .calc_md5_hash = tcp_v6_md5_hash_skb, 2084 .md5_parse = tcp_v6_parse_md5_keys, 2085 #endif 2086 #ifdef CONFIG_TCP_AO 2087 .ao_lookup = tcp_v6_ao_lookup, 2088 .calc_ao_hash = tcp_v6_ao_hash_skb, 2089 .ao_parse = tcp_v6_parse_ao, 2090 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2091 #endif 2092 }; 2093 #endif 2094 2095 /* 2096 * TCP over IPv4 via INET6 API 2097 */ 2098 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2099 .queue_xmit = ip_queue_xmit, 2100 .send_check = tcp_v4_send_check, 2101 .rebuild_header = inet_sk_rebuild_header, 2102 .sk_rx_dst_set = inet_sk_rx_dst_set, 2103 .conn_request = tcp_v6_conn_request, 2104 .syn_recv_sock = tcp_v6_syn_recv_sock, 2105 .net_header_len = sizeof(struct iphdr), 2106 .setsockopt = ipv6_setsockopt, 2107 .getsockopt = ipv6_getsockopt, 2108 .mtu_reduced = tcp_v4_mtu_reduced, 2109 }; 2110 2111 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2112 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2113 #ifdef CONFIG_TCP_MD5SIG 2114 .md5_lookup = tcp_v4_md5_lookup, 2115 .calc_md5_hash = tcp_v4_md5_hash_skb, 2116 .md5_parse = tcp_v6_parse_md5_keys, 2117 #endif 2118 #ifdef CONFIG_TCP_AO 2119 .ao_lookup = tcp_v6_ao_lookup, 2120 .calc_ao_hash = tcp_v4_ao_hash_skb, 2121 .ao_parse = tcp_v6_parse_ao, 2122 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2123 #endif 2124 }; 2125 2126 static void tcp6_destruct_sock(struct sock *sk) 2127 { 2128 tcp_md5_destruct_sock(sk); 2129 tcp_ao_destroy_sock(sk, false); 2130 inet6_sock_destruct(sk); 2131 } 2132 #endif 2133 2134 /* NOTE: A lot of things set to zero explicitly by call to 2135 * sk_alloc() so need not be done here. 2136 */ 2137 static int tcp_v6_init_sock(struct sock *sk) 2138 { 2139 struct inet_connection_sock *icsk = inet_csk(sk); 2140 2141 tcp_init_sock(sk); 2142 2143 icsk->icsk_af_ops = &ipv6_specific; 2144 2145 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2146 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2147 sk->sk_destruct = tcp6_destruct_sock; 2148 #endif 2149 2150 return 0; 2151 } 2152 2153 #ifdef CONFIG_PROC_FS 2154 /* Proc filesystem TCPv6 sock list dumping. */ 2155 static void get_openreq6(struct seq_file *seq, 2156 const struct request_sock *req, int i) 2157 { 2158 long ttd = req->rsk_timer.expires - jiffies; 2159 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2160 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2161 2162 if (ttd < 0) 2163 ttd = 0; 2164 2165 seq_printf(seq, 2166 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2167 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2168 i, 2169 src->s6_addr32[0], src->s6_addr32[1], 2170 src->s6_addr32[2], src->s6_addr32[3], 2171 inet_rsk(req)->ir_num, 2172 dest->s6_addr32[0], dest->s6_addr32[1], 2173 dest->s6_addr32[2], dest->s6_addr32[3], 2174 ntohs(inet_rsk(req)->ir_rmt_port), 2175 TCP_SYN_RECV, 2176 0, 0, /* could print option size, but that is af dependent. */ 2177 1, /* timers active (only the expire timer) */ 2178 jiffies_to_clock_t(ttd), 2179 req->num_timeout, 2180 from_kuid_munged(seq_user_ns(seq), 2181 sk_uid(req->rsk_listener)), 2182 0, /* non standard timer */ 2183 0, /* open_requests have no inode */ 2184 0, req); 2185 } 2186 2187 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2188 { 2189 const struct in6_addr *dest, *src; 2190 __u16 destp, srcp; 2191 int timer_active; 2192 unsigned long timer_expires; 2193 const struct inet_sock *inet = inet_sk(sp); 2194 const struct tcp_sock *tp = tcp_sk(sp); 2195 const struct inet_connection_sock *icsk = inet_csk(sp); 2196 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2197 u8 icsk_pending; 2198 int rx_queue; 2199 int state; 2200 2201 dest = &sp->sk_v6_daddr; 2202 src = &sp->sk_v6_rcv_saddr; 2203 destp = ntohs(inet->inet_dport); 2204 srcp = ntohs(inet->inet_sport); 2205 2206 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2207 if (icsk_pending == ICSK_TIME_RETRANS || 2208 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2209 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2210 timer_active = 1; 2211 timer_expires = icsk_timeout(icsk); 2212 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2213 timer_active = 4; 2214 timer_expires = icsk_timeout(icsk); 2215 } else if (timer_pending(&sp->sk_timer)) { 2216 timer_active = 2; 2217 timer_expires = sp->sk_timer.expires; 2218 } else { 2219 timer_active = 0; 2220 timer_expires = jiffies; 2221 } 2222 2223 state = inet_sk_state_load(sp); 2224 if (state == TCP_LISTEN) 2225 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2226 else 2227 /* Because we don't lock the socket, 2228 * we might find a transient negative value. 2229 */ 2230 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2231 READ_ONCE(tp->copied_seq), 0); 2232 2233 seq_printf(seq, 2234 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2235 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2236 i, 2237 src->s6_addr32[0], src->s6_addr32[1], 2238 src->s6_addr32[2], src->s6_addr32[3], srcp, 2239 dest->s6_addr32[0], dest->s6_addr32[1], 2240 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2241 state, 2242 READ_ONCE(tp->write_seq) - tp->snd_una, 2243 rx_queue, 2244 timer_active, 2245 jiffies_delta_to_clock_t(timer_expires - jiffies), 2246 READ_ONCE(icsk->icsk_retransmits), 2247 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2248 READ_ONCE(icsk->icsk_probes_out), 2249 sock_i_ino(sp), 2250 refcount_read(&sp->sk_refcnt), sp, 2251 jiffies_to_clock_t(icsk->icsk_rto), 2252 jiffies_to_clock_t(icsk->icsk_ack.ato), 2253 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2254 tcp_snd_cwnd(tp), 2255 state == TCP_LISTEN ? 2256 fastopenq->max_qlen : 2257 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2258 ); 2259 } 2260 2261 static void get_timewait6_sock(struct seq_file *seq, 2262 struct inet_timewait_sock *tw, int i) 2263 { 2264 long delta = tw->tw_timer.expires - jiffies; 2265 const struct in6_addr *dest, *src; 2266 __u16 destp, srcp; 2267 2268 dest = &tw->tw_v6_daddr; 2269 src = &tw->tw_v6_rcv_saddr; 2270 destp = ntohs(tw->tw_dport); 2271 srcp = ntohs(tw->tw_sport); 2272 2273 seq_printf(seq, 2274 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2275 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2276 i, 2277 src->s6_addr32[0], src->s6_addr32[1], 2278 src->s6_addr32[2], src->s6_addr32[3], srcp, 2279 dest->s6_addr32[0], dest->s6_addr32[1], 2280 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2281 READ_ONCE(tw->tw_substate), 0, 0, 2282 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2283 refcount_read(&tw->tw_refcnt), tw); 2284 } 2285 2286 static int tcp6_seq_show(struct seq_file *seq, void *v) 2287 { 2288 struct tcp_iter_state *st; 2289 struct sock *sk = v; 2290 2291 if (v == SEQ_START_TOKEN) { 2292 seq_puts(seq, 2293 " sl " 2294 "local_address " 2295 "remote_address " 2296 "st tx_queue rx_queue tr tm->when retrnsmt" 2297 " uid timeout inode\n"); 2298 goto out; 2299 } 2300 st = seq->private; 2301 2302 if (sk->sk_state == TCP_TIME_WAIT) 2303 get_timewait6_sock(seq, v, st->num); 2304 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2305 get_openreq6(seq, v, st->num); 2306 else 2307 get_tcp6_sock(seq, v, st->num); 2308 out: 2309 return 0; 2310 } 2311 2312 static const struct seq_operations tcp6_seq_ops = { 2313 .show = tcp6_seq_show, 2314 .start = tcp_seq_start, 2315 .next = tcp_seq_next, 2316 .stop = tcp_seq_stop, 2317 }; 2318 2319 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2320 .family = AF_INET6, 2321 }; 2322 2323 int __net_init tcp6_proc_init(struct net *net) 2324 { 2325 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2326 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2327 return -ENOMEM; 2328 return 0; 2329 } 2330 2331 void tcp6_proc_exit(struct net *net) 2332 { 2333 remove_proc_entry("tcp6", net->proc_net); 2334 } 2335 #endif 2336 2337 struct proto tcpv6_prot = { 2338 .name = "TCPv6", 2339 .owner = THIS_MODULE, 2340 .close = tcp_close, 2341 .pre_connect = tcp_v6_pre_connect, 2342 .connect = tcp_v6_connect, 2343 .disconnect = tcp_disconnect, 2344 .accept = inet_csk_accept, 2345 .ioctl = tcp_ioctl, 2346 .init = tcp_v6_init_sock, 2347 .destroy = tcp_v4_destroy_sock, 2348 .shutdown = tcp_shutdown, 2349 .setsockopt = tcp_setsockopt, 2350 .getsockopt = tcp_getsockopt, 2351 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2352 .keepalive = tcp_set_keepalive, 2353 .recvmsg = tcp_recvmsg, 2354 .sendmsg = tcp_sendmsg, 2355 .splice_eof = tcp_splice_eof, 2356 .backlog_rcv = tcp_v6_do_rcv, 2357 .release_cb = tcp_release_cb, 2358 .hash = inet_hash, 2359 .unhash = inet_unhash, 2360 .get_port = inet_csk_get_port, 2361 .put_port = inet_put_port, 2362 #ifdef CONFIG_BPF_SYSCALL 2363 .psock_update_sk_prot = tcp_bpf_update_proto, 2364 #endif 2365 .enter_memory_pressure = tcp_enter_memory_pressure, 2366 .leave_memory_pressure = tcp_leave_memory_pressure, 2367 .stream_memory_free = tcp_stream_memory_free, 2368 .sockets_allocated = &tcp_sockets_allocated, 2369 2370 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2371 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2372 2373 .memory_pressure = &tcp_memory_pressure, 2374 .sysctl_mem = sysctl_tcp_mem, 2375 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2376 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2377 .max_header = MAX_TCP_HEADER, 2378 .obj_size = sizeof(struct tcp6_sock), 2379 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2380 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2381 .twsk_prot = &tcp6_timewait_sock_ops, 2382 .rsk_prot = &tcp6_request_sock_ops, 2383 .h.hashinfo = NULL, 2384 .no_autobind = true, 2385 .diag_destroy = tcp_abort, 2386 }; 2387 EXPORT_SYMBOL_GPL(tcpv6_prot); 2388 2389 2390 static struct inet_protosw tcpv6_protosw = { 2391 .type = SOCK_STREAM, 2392 .protocol = IPPROTO_TCP, 2393 .prot = &tcpv6_prot, 2394 .ops = &inet6_stream_ops, 2395 .flags = INET_PROTOSW_PERMANENT | 2396 INET_PROTOSW_ICSK, 2397 }; 2398 2399 static int __net_init tcpv6_net_init(struct net *net) 2400 { 2401 int res; 2402 2403 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2404 SOCK_RAW, IPPROTO_TCP, net); 2405 if (!res) 2406 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2407 2408 return res; 2409 } 2410 2411 static void __net_exit tcpv6_net_exit(struct net *net) 2412 { 2413 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2414 } 2415 2416 static struct pernet_operations tcpv6_net_ops = { 2417 .init = tcpv6_net_init, 2418 .exit = tcpv6_net_exit, 2419 }; 2420 2421 int __init tcpv6_init(void) 2422 { 2423 int ret; 2424 2425 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2426 .handler = tcp_v6_rcv, 2427 .err_handler = tcp_v6_err, 2428 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2429 }; 2430 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2431 if (ret) 2432 goto out; 2433 2434 /* register inet6 protocol */ 2435 ret = inet6_register_protosw(&tcpv6_protosw); 2436 if (ret) 2437 goto out_tcpv6_protocol; 2438 2439 ret = register_pernet_subsys(&tcpv6_net_ops); 2440 if (ret) 2441 goto out_tcpv6_protosw; 2442 2443 ret = mptcpv6_init(); 2444 if (ret) 2445 goto out_tcpv6_pernet_subsys; 2446 2447 out: 2448 return ret; 2449 2450 out_tcpv6_pernet_subsys: 2451 unregister_pernet_subsys(&tcpv6_net_ops); 2452 out_tcpv6_protosw: 2453 inet6_unregister_protosw(&tcpv6_protosw); 2454 out_tcpv6_protocol: 2455 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2456 goto out; 2457 } 2458 2459 void tcpv6_exit(void) 2460 { 2461 unregister_pernet_subsys(&tcpv6_net_ops); 2462 inet6_unregister_protosw(&tcpv6_protosw); 2463 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2464 } 2465