1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_connection_sock *icsk = inet_csk(sk); 150 struct in6_addr *saddr = NULL, *final_p, final; 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct inet_sock *inet = inet_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk); 155 struct net *net = sock_net(sk); 156 struct ipv6_txoptions *opt; 157 struct dst_entry *dst; 158 struct flowi6 fl6; 159 int addr_type; 160 int err; 161 162 if (addr_len < SIN6_LEN_RFC2133) 163 return -EINVAL; 164 165 if (usin->sin6_family != AF_INET6) 166 return -EAFNOSUPPORT; 167 168 memset(&fl6, 0, sizeof(fl6)); 169 170 if (np->sndflow) { 171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 172 IP6_ECN_flow_init(fl6.flowlabel); 173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 174 struct ip6_flowlabel *flowlabel; 175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 176 if (IS_ERR(flowlabel)) 177 return -EINVAL; 178 fl6_sock_release(flowlabel); 179 } 180 } 181 182 /* 183 * connect() to INADDR_ANY means loopback (BSD'ism). 184 */ 185 186 if (ipv6_addr_any(&usin->sin6_addr)) { 187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 189 &usin->sin6_addr); 190 else 191 usin->sin6_addr = in6addr_loopback; 192 } 193 194 addr_type = ipv6_addr_type(&usin->sin6_addr); 195 196 if (addr_type & IPV6_ADDR_MULTICAST) 197 return -ENETUNREACH; 198 199 if (addr_type&IPV6_ADDR_LINKLOCAL) { 200 if (addr_len >= sizeof(struct sockaddr_in6) && 201 usin->sin6_scope_id) { 202 /* If interface is set while binding, indices 203 * must coincide. 204 */ 205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 206 return -EINVAL; 207 208 sk->sk_bound_dev_if = usin->sin6_scope_id; 209 } 210 211 /* Connect to link-local address requires an interface */ 212 if (!sk->sk_bound_dev_if) 213 return -EINVAL; 214 } 215 216 if (tp->rx_opt.ts_recent_stamp && 217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 218 tp->rx_opt.ts_recent = 0; 219 tp->rx_opt.ts_recent_stamp = 0; 220 WRITE_ONCE(tp->write_seq, 0); 221 } 222 223 sk->sk_v6_daddr = usin->sin6_addr; 224 np->flow_label = fl6.flowlabel; 225 226 /* 227 * TCP over IPv4 228 */ 229 230 if (addr_type & IPV6_ADDR_MAPPED) { 231 u32 exthdrlen = icsk->icsk_ext_hdr_len; 232 struct sockaddr_in sin; 233 234 if (ipv6_only_sock(sk)) 235 return -ENETUNREACH; 236 237 sin.sin_family = AF_INET; 238 sin.sin_port = usin->sin6_port; 239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 240 241 icsk->icsk_af_ops = &ipv6_mapped; 242 if (sk_is_mptcp(sk)) 243 mptcpv6_handle_mapped(sk, true); 244 sk->sk_backlog_rcv = tcp_v4_do_rcv; 245 #ifdef CONFIG_TCP_MD5SIG 246 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 247 #endif 248 249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 250 251 if (err) { 252 icsk->icsk_ext_hdr_len = exthdrlen; 253 icsk->icsk_af_ops = &ipv6_specific; 254 if (sk_is_mptcp(sk)) 255 mptcpv6_handle_mapped(sk, false); 256 sk->sk_backlog_rcv = tcp_v6_do_rcv; 257 #ifdef CONFIG_TCP_MD5SIG 258 tp->af_specific = &tcp_sock_ipv6_specific; 259 #endif 260 goto failure; 261 } 262 np->saddr = sk->sk_v6_rcv_saddr; 263 264 return err; 265 } 266 267 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 268 saddr = &sk->sk_v6_rcv_saddr; 269 270 fl6.flowi6_proto = IPPROTO_TCP; 271 fl6.daddr = sk->sk_v6_daddr; 272 fl6.saddr = saddr ? *saddr : np->saddr; 273 fl6.flowi6_oif = sk->sk_bound_dev_if; 274 fl6.flowi6_mark = sk->sk_mark; 275 fl6.fl6_dport = usin->sin6_port; 276 fl6.fl6_sport = inet->inet_sport; 277 fl6.flowi6_uid = sk->sk_uid; 278 279 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 280 final_p = fl6_update_dst(&fl6, opt, &final); 281 282 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 283 284 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 285 if (IS_ERR(dst)) { 286 err = PTR_ERR(dst); 287 goto failure; 288 } 289 290 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 291 292 if (!saddr) { 293 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; 294 struct in6_addr prev_v6_rcv_saddr; 295 296 if (icsk->icsk_bind2_hash) { 297 prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, 298 sk, net, inet->inet_num); 299 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 300 } 301 saddr = &fl6.saddr; 302 sk->sk_v6_rcv_saddr = *saddr; 303 304 if (prev_addr_hashbucket) { 305 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); 306 if (err) { 307 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; 308 goto failure; 309 } 310 } 311 } 312 313 /* set the source address */ 314 np->saddr = *saddr; 315 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 316 317 sk->sk_gso_type = SKB_GSO_TCPV6; 318 ip6_dst_store(sk, dst, NULL, NULL); 319 320 icsk->icsk_ext_hdr_len = 0; 321 if (opt) 322 icsk->icsk_ext_hdr_len = opt->opt_flen + 323 opt->opt_nflen; 324 325 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 326 327 inet->inet_dport = usin->sin6_port; 328 329 tcp_set_state(sk, TCP_SYN_SENT); 330 err = inet6_hash_connect(tcp_death_row, sk); 331 if (err) 332 goto late_failure; 333 334 sk_set_txhash(sk); 335 336 if (likely(!tp->repair)) { 337 if (!tp->write_seq) 338 WRITE_ONCE(tp->write_seq, 339 secure_tcpv6_seq(np->saddr.s6_addr32, 340 sk->sk_v6_daddr.s6_addr32, 341 inet->inet_sport, 342 inet->inet_dport)); 343 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 344 sk->sk_v6_daddr.s6_addr32); 345 } 346 347 if (tcp_fastopen_defer_connect(sk, &err)) 348 return err; 349 if (err) 350 goto late_failure; 351 352 err = tcp_connect(sk); 353 if (err) 354 goto late_failure; 355 356 return 0; 357 358 late_failure: 359 tcp_set_state(sk, TCP_CLOSE); 360 failure: 361 inet->inet_dport = 0; 362 sk->sk_route_caps = 0; 363 return err; 364 } 365 366 static void tcp_v6_mtu_reduced(struct sock *sk) 367 { 368 struct dst_entry *dst; 369 u32 mtu; 370 371 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 372 return; 373 374 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 375 376 /* Drop requests trying to increase our current mss. 377 * Check done in __ip6_rt_update_pmtu() is too late. 378 */ 379 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 380 return; 381 382 dst = inet6_csk_update_pmtu(sk, mtu); 383 if (!dst) 384 return; 385 386 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 387 tcp_sync_mss(sk, dst_mtu(dst)); 388 tcp_simple_retransmit(sk); 389 } 390 } 391 392 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 393 u8 type, u8 code, int offset, __be32 info) 394 { 395 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 396 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 397 struct net *net = dev_net(skb->dev); 398 struct request_sock *fastopen; 399 struct ipv6_pinfo *np; 400 struct tcp_sock *tp; 401 __u32 seq, snd_una; 402 struct sock *sk; 403 bool fatal; 404 int err; 405 406 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 407 &hdr->daddr, th->dest, 408 &hdr->saddr, ntohs(th->source), 409 skb->dev->ifindex, inet6_sdif(skb)); 410 411 if (!sk) { 412 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 413 ICMP6_MIB_INERRORS); 414 return -ENOENT; 415 } 416 417 if (sk->sk_state == TCP_TIME_WAIT) { 418 inet_twsk_put(inet_twsk(sk)); 419 return 0; 420 } 421 seq = ntohl(th->seq); 422 fatal = icmpv6_err_convert(type, code, &err); 423 if (sk->sk_state == TCP_NEW_SYN_RECV) { 424 tcp_req_err(sk, seq, fatal); 425 return 0; 426 } 427 428 bh_lock_sock(sk); 429 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 430 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 431 432 if (sk->sk_state == TCP_CLOSE) 433 goto out; 434 435 if (static_branch_unlikely(&ip6_min_hopcount)) { 436 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 437 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 438 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 439 goto out; 440 } 441 } 442 443 tp = tcp_sk(sk); 444 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 445 fastopen = rcu_dereference(tp->fastopen_rsk); 446 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 447 if (sk->sk_state != TCP_LISTEN && 448 !between(seq, snd_una, tp->snd_nxt)) { 449 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 450 goto out; 451 } 452 453 np = tcp_inet6_sk(sk); 454 455 if (type == NDISC_REDIRECT) { 456 if (!sock_owned_by_user(sk)) { 457 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 458 459 if (dst) 460 dst->ops->redirect(dst, sk, skb); 461 } 462 goto out; 463 } 464 465 if (type == ICMPV6_PKT_TOOBIG) { 466 u32 mtu = ntohl(info); 467 468 /* We are not interested in TCP_LISTEN and open_requests 469 * (SYN-ACKs send out by Linux are always <576bytes so 470 * they should go through unfragmented). 471 */ 472 if (sk->sk_state == TCP_LISTEN) 473 goto out; 474 475 if (!ip6_sk_accept_pmtu(sk)) 476 goto out; 477 478 if (mtu < IPV6_MIN_MTU) 479 goto out; 480 481 WRITE_ONCE(tp->mtu_info, mtu); 482 483 if (!sock_owned_by_user(sk)) 484 tcp_v6_mtu_reduced(sk); 485 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 486 &sk->sk_tsq_flags)) 487 sock_hold(sk); 488 goto out; 489 } 490 491 492 /* Might be for an request_sock */ 493 switch (sk->sk_state) { 494 case TCP_SYN_SENT: 495 case TCP_SYN_RECV: 496 /* Only in fast or simultaneous open. If a fast open socket is 497 * already accepted it is treated as a connected one below. 498 */ 499 if (fastopen && !fastopen->sk) 500 break; 501 502 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 503 504 if (!sock_owned_by_user(sk)) { 505 sk->sk_err = err; 506 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 507 508 tcp_done(sk); 509 } else 510 sk->sk_err_soft = err; 511 goto out; 512 case TCP_LISTEN: 513 break; 514 default: 515 /* check if this ICMP message allows revert of backoff. 516 * (see RFC 6069) 517 */ 518 if (!fastopen && type == ICMPV6_DEST_UNREACH && 519 code == ICMPV6_NOROUTE) 520 tcp_ld_RTO_revert(sk, seq); 521 } 522 523 if (!sock_owned_by_user(sk) && np->recverr) { 524 sk->sk_err = err; 525 sk_error_report(sk); 526 } else 527 sk->sk_err_soft = err; 528 529 out: 530 bh_unlock_sock(sk); 531 sock_put(sk); 532 return 0; 533 } 534 535 536 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 537 struct flowi *fl, 538 struct request_sock *req, 539 struct tcp_fastopen_cookie *foc, 540 enum tcp_synack_type synack_type, 541 struct sk_buff *syn_skb) 542 { 543 struct inet_request_sock *ireq = inet_rsk(req); 544 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 545 struct ipv6_txoptions *opt; 546 struct flowi6 *fl6 = &fl->u.ip6; 547 struct sk_buff *skb; 548 int err = -ENOMEM; 549 u8 tclass; 550 551 /* First, grab a route. */ 552 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 553 IPPROTO_TCP)) == NULL) 554 goto done; 555 556 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 557 558 if (skb) { 559 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 560 &ireq->ir_v6_rmt_addr); 561 562 fl6->daddr = ireq->ir_v6_rmt_addr; 563 if (np->repflow && ireq->pktopts) 564 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 565 566 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 567 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 568 (np->tclass & INET_ECN_MASK) : 569 np->tclass; 570 571 if (!INET_ECN_is_capable(tclass) && 572 tcp_bpf_ca_needs_ecn((struct sock *)req)) 573 tclass |= INET_ECN_ECT_0; 574 575 rcu_read_lock(); 576 opt = ireq->ipv6_opt; 577 if (!opt) 578 opt = rcu_dereference(np->opt); 579 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 580 tclass, sk->sk_priority); 581 rcu_read_unlock(); 582 err = net_xmit_eval(err); 583 } 584 585 done: 586 return err; 587 } 588 589 590 static void tcp_v6_reqsk_destructor(struct request_sock *req) 591 { 592 kfree(inet_rsk(req)->ipv6_opt); 593 consume_skb(inet_rsk(req)->pktopts); 594 } 595 596 #ifdef CONFIG_TCP_MD5SIG 597 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 598 const struct in6_addr *addr, 599 int l3index) 600 { 601 return tcp_md5_do_lookup(sk, l3index, 602 (union tcp_md5_addr *)addr, AF_INET6); 603 } 604 605 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 606 const struct sock *addr_sk) 607 { 608 int l3index; 609 610 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 611 addr_sk->sk_bound_dev_if); 612 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 613 l3index); 614 } 615 616 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 617 sockptr_t optval, int optlen) 618 { 619 struct tcp_md5sig cmd; 620 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 621 int l3index = 0; 622 u8 prefixlen; 623 u8 flags; 624 625 if (optlen < sizeof(cmd)) 626 return -EINVAL; 627 628 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 629 return -EFAULT; 630 631 if (sin6->sin6_family != AF_INET6) 632 return -EINVAL; 633 634 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 635 636 if (optname == TCP_MD5SIG_EXT && 637 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 638 prefixlen = cmd.tcpm_prefixlen; 639 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 640 prefixlen > 32)) 641 return -EINVAL; 642 } else { 643 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 644 } 645 646 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 647 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 648 struct net_device *dev; 649 650 rcu_read_lock(); 651 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 652 if (dev && netif_is_l3_master(dev)) 653 l3index = dev->ifindex; 654 rcu_read_unlock(); 655 656 /* ok to reference set/not set outside of rcu; 657 * right now device MUST be an L3 master 658 */ 659 if (!dev || !l3index) 660 return -EINVAL; 661 } 662 663 if (!cmd.tcpm_keylen) { 664 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 665 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 666 AF_INET, prefixlen, 667 l3index, flags); 668 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 669 AF_INET6, prefixlen, l3index, flags); 670 } 671 672 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 673 return -EINVAL; 674 675 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 676 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 677 AF_INET, prefixlen, l3index, flags, 678 cmd.tcpm_key, cmd.tcpm_keylen, 679 GFP_KERNEL); 680 681 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 682 AF_INET6, prefixlen, l3index, flags, 683 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 684 } 685 686 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 687 const struct in6_addr *daddr, 688 const struct in6_addr *saddr, 689 const struct tcphdr *th, int nbytes) 690 { 691 struct tcp6_pseudohdr *bp; 692 struct scatterlist sg; 693 struct tcphdr *_th; 694 695 bp = hp->scratch; 696 /* 1. TCP pseudo-header (RFC2460) */ 697 bp->saddr = *saddr; 698 bp->daddr = *daddr; 699 bp->protocol = cpu_to_be32(IPPROTO_TCP); 700 bp->len = cpu_to_be32(nbytes); 701 702 _th = (struct tcphdr *)(bp + 1); 703 memcpy(_th, th, sizeof(*th)); 704 _th->check = 0; 705 706 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 707 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 708 sizeof(*bp) + sizeof(*th)); 709 return crypto_ahash_update(hp->md5_req); 710 } 711 712 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 713 const struct in6_addr *daddr, struct in6_addr *saddr, 714 const struct tcphdr *th) 715 { 716 struct tcp_md5sig_pool *hp; 717 struct ahash_request *req; 718 719 hp = tcp_get_md5sig_pool(); 720 if (!hp) 721 goto clear_hash_noput; 722 req = hp->md5_req; 723 724 if (crypto_ahash_init(req)) 725 goto clear_hash; 726 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 727 goto clear_hash; 728 if (tcp_md5_hash_key(hp, key)) 729 goto clear_hash; 730 ahash_request_set_crypt(req, NULL, md5_hash, 0); 731 if (crypto_ahash_final(req)) 732 goto clear_hash; 733 734 tcp_put_md5sig_pool(); 735 return 0; 736 737 clear_hash: 738 tcp_put_md5sig_pool(); 739 clear_hash_noput: 740 memset(md5_hash, 0, 16); 741 return 1; 742 } 743 744 static int tcp_v6_md5_hash_skb(char *md5_hash, 745 const struct tcp_md5sig_key *key, 746 const struct sock *sk, 747 const struct sk_buff *skb) 748 { 749 const struct in6_addr *saddr, *daddr; 750 struct tcp_md5sig_pool *hp; 751 struct ahash_request *req; 752 const struct tcphdr *th = tcp_hdr(skb); 753 754 if (sk) { /* valid for establish/request sockets */ 755 saddr = &sk->sk_v6_rcv_saddr; 756 daddr = &sk->sk_v6_daddr; 757 } else { 758 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 759 saddr = &ip6h->saddr; 760 daddr = &ip6h->daddr; 761 } 762 763 hp = tcp_get_md5sig_pool(); 764 if (!hp) 765 goto clear_hash_noput; 766 req = hp->md5_req; 767 768 if (crypto_ahash_init(req)) 769 goto clear_hash; 770 771 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 772 goto clear_hash; 773 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 774 goto clear_hash; 775 if (tcp_md5_hash_key(hp, key)) 776 goto clear_hash; 777 ahash_request_set_crypt(req, NULL, md5_hash, 0); 778 if (crypto_ahash_final(req)) 779 goto clear_hash; 780 781 tcp_put_md5sig_pool(); 782 return 0; 783 784 clear_hash: 785 tcp_put_md5sig_pool(); 786 clear_hash_noput: 787 memset(md5_hash, 0, 16); 788 return 1; 789 } 790 791 #endif 792 793 static void tcp_v6_init_req(struct request_sock *req, 794 const struct sock *sk_listener, 795 struct sk_buff *skb) 796 { 797 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 798 struct inet_request_sock *ireq = inet_rsk(req); 799 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 800 801 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 802 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 803 804 /* So that link locals have meaning */ 805 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 806 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 807 ireq->ir_iif = tcp_v6_iif(skb); 808 809 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 810 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 811 np->rxopt.bits.rxinfo || 812 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 813 np->rxopt.bits.rxohlim || np->repflow)) { 814 refcount_inc(&skb->users); 815 ireq->pktopts = skb; 816 } 817 } 818 819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 820 struct sk_buff *skb, 821 struct flowi *fl, 822 struct request_sock *req) 823 { 824 tcp_v6_init_req(req, sk, skb); 825 826 if (security_inet_conn_request(sk, skb, req)) 827 return NULL; 828 829 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 830 } 831 832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 833 .family = AF_INET6, 834 .obj_size = sizeof(struct tcp6_request_sock), 835 .rtx_syn_ack = tcp_rtx_synack, 836 .send_ack = tcp_v6_reqsk_send_ack, 837 .destructor = tcp_v6_reqsk_destructor, 838 .send_reset = tcp_v6_send_reset, 839 .syn_ack_timeout = tcp_syn_ack_timeout, 840 }; 841 842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 843 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 844 sizeof(struct ipv6hdr), 845 #ifdef CONFIG_TCP_MD5SIG 846 .req_md5_lookup = tcp_v6_md5_lookup, 847 .calc_md5_hash = tcp_v6_md5_hash_skb, 848 #endif 849 #ifdef CONFIG_SYN_COOKIES 850 .cookie_init_seq = cookie_v6_init_sequence, 851 #endif 852 .route_req = tcp_v6_route_req, 853 .init_seq = tcp_v6_init_seq, 854 .init_ts_off = tcp_v6_init_ts_off, 855 .send_synack = tcp_v6_send_synack, 856 }; 857 858 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 859 u32 ack, u32 win, u32 tsval, u32 tsecr, 860 int oif, struct tcp_md5sig_key *key, int rst, 861 u8 tclass, __be32 label, u32 priority, u32 txhash) 862 { 863 const struct tcphdr *th = tcp_hdr(skb); 864 struct tcphdr *t1; 865 struct sk_buff *buff; 866 struct flowi6 fl6; 867 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 868 struct sock *ctl_sk = net->ipv6.tcp_sk; 869 unsigned int tot_len = sizeof(struct tcphdr); 870 __be32 mrst = 0, *topt; 871 struct dst_entry *dst; 872 __u32 mark = 0; 873 874 if (tsecr) 875 tot_len += TCPOLEN_TSTAMP_ALIGNED; 876 #ifdef CONFIG_TCP_MD5SIG 877 if (key) 878 tot_len += TCPOLEN_MD5SIG_ALIGNED; 879 #endif 880 881 #ifdef CONFIG_MPTCP 882 if (rst && !key) { 883 mrst = mptcp_reset_option(skb); 884 885 if (mrst) 886 tot_len += sizeof(__be32); 887 } 888 #endif 889 890 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 891 if (!buff) 892 return; 893 894 skb_reserve(buff, MAX_TCP_HEADER); 895 896 t1 = skb_push(buff, tot_len); 897 skb_reset_transport_header(buff); 898 899 /* Swap the send and the receive. */ 900 memset(t1, 0, sizeof(*t1)); 901 t1->dest = th->source; 902 t1->source = th->dest; 903 t1->doff = tot_len / 4; 904 t1->seq = htonl(seq); 905 t1->ack_seq = htonl(ack); 906 t1->ack = !rst || !th->ack; 907 t1->rst = rst; 908 t1->window = htons(win); 909 910 topt = (__be32 *)(t1 + 1); 911 912 if (tsecr) { 913 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 914 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 915 *topt++ = htonl(tsval); 916 *topt++ = htonl(tsecr); 917 } 918 919 if (mrst) 920 *topt++ = mrst; 921 922 #ifdef CONFIG_TCP_MD5SIG 923 if (key) { 924 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 925 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 926 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 927 &ipv6_hdr(skb)->saddr, 928 &ipv6_hdr(skb)->daddr, t1); 929 } 930 #endif 931 932 memset(&fl6, 0, sizeof(fl6)); 933 fl6.daddr = ipv6_hdr(skb)->saddr; 934 fl6.saddr = ipv6_hdr(skb)->daddr; 935 fl6.flowlabel = label; 936 937 buff->ip_summed = CHECKSUM_PARTIAL; 938 939 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 940 941 fl6.flowi6_proto = IPPROTO_TCP; 942 if (rt6_need_strict(&fl6.daddr) && !oif) 943 fl6.flowi6_oif = tcp_v6_iif(skb); 944 else { 945 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 946 oif = skb->skb_iif; 947 948 fl6.flowi6_oif = oif; 949 } 950 951 if (sk) { 952 if (sk->sk_state == TCP_TIME_WAIT) 953 mark = inet_twsk(sk)->tw_mark; 954 else 955 mark = sk->sk_mark; 956 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 957 } 958 if (txhash) { 959 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 960 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 961 } 962 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 963 fl6.fl6_dport = t1->dest; 964 fl6.fl6_sport = t1->source; 965 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 966 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 967 968 /* Pass a socket to ip6_dst_lookup either it is for RST 969 * Underlying function will use this to retrieve the network 970 * namespace 971 */ 972 if (sk && sk->sk_state != TCP_TIME_WAIT) 973 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 974 else 975 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 976 if (!IS_ERR(dst)) { 977 skb_dst_set(buff, dst); 978 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 979 tclass & ~INET_ECN_MASK, priority); 980 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 981 if (rst) 982 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 983 return; 984 } 985 986 kfree_skb(buff); 987 } 988 989 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 990 { 991 const struct tcphdr *th = tcp_hdr(skb); 992 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 993 u32 seq = 0, ack_seq = 0; 994 struct tcp_md5sig_key *key = NULL; 995 #ifdef CONFIG_TCP_MD5SIG 996 const __u8 *hash_location = NULL; 997 unsigned char newhash[16]; 998 int genhash; 999 struct sock *sk1 = NULL; 1000 #endif 1001 __be32 label = 0; 1002 u32 priority = 0; 1003 struct net *net; 1004 u32 txhash = 0; 1005 int oif = 0; 1006 1007 if (th->rst) 1008 return; 1009 1010 /* If sk not NULL, it means we did a successful lookup and incoming 1011 * route had to be correct. prequeue might have dropped our dst. 1012 */ 1013 if (!sk && !ipv6_unicast_destination(skb)) 1014 return; 1015 1016 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1017 #ifdef CONFIG_TCP_MD5SIG 1018 rcu_read_lock(); 1019 hash_location = tcp_parse_md5sig_option(th); 1020 if (sk && sk_fullsock(sk)) { 1021 int l3index; 1022 1023 /* sdif set, means packet ingressed via a device 1024 * in an L3 domain and inet_iif is set to it. 1025 */ 1026 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1027 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1028 } else if (hash_location) { 1029 int dif = tcp_v6_iif_l3_slave(skb); 1030 int sdif = tcp_v6_sdif(skb); 1031 int l3index; 1032 1033 /* 1034 * active side is lost. Try to find listening socket through 1035 * source port, and then find md5 key through listening socket. 1036 * we are not loose security here: 1037 * Incoming packet is checked with md5 hash with finding key, 1038 * no RST generated if md5 hash doesn't match. 1039 */ 1040 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1041 NULL, 0, &ipv6h->saddr, th->source, 1042 &ipv6h->daddr, ntohs(th->source), 1043 dif, sdif); 1044 if (!sk1) 1045 goto out; 1046 1047 /* sdif set, means packet ingressed via a device 1048 * in an L3 domain and dif is set to it. 1049 */ 1050 l3index = tcp_v6_sdif(skb) ? dif : 0; 1051 1052 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1053 if (!key) 1054 goto out; 1055 1056 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1057 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1058 goto out; 1059 } 1060 #endif 1061 1062 if (th->ack) 1063 seq = ntohl(th->ack_seq); 1064 else 1065 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1066 (th->doff << 2); 1067 1068 if (sk) { 1069 oif = sk->sk_bound_dev_if; 1070 if (sk_fullsock(sk)) { 1071 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1072 1073 trace_tcp_send_reset(sk, skb); 1074 if (np->repflow) 1075 label = ip6_flowlabel(ipv6h); 1076 priority = sk->sk_priority; 1077 txhash = sk->sk_hash; 1078 } 1079 if (sk->sk_state == TCP_TIME_WAIT) { 1080 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1081 priority = inet_twsk(sk)->tw_priority; 1082 txhash = inet_twsk(sk)->tw_txhash; 1083 } 1084 } else { 1085 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1086 label = ip6_flowlabel(ipv6h); 1087 } 1088 1089 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1090 ipv6_get_dsfield(ipv6h), label, priority, txhash); 1091 1092 #ifdef CONFIG_TCP_MD5SIG 1093 out: 1094 rcu_read_unlock(); 1095 #endif 1096 } 1097 1098 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1099 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1100 struct tcp_md5sig_key *key, u8 tclass, 1101 __be32 label, u32 priority, u32 txhash) 1102 { 1103 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1104 tclass, label, priority, txhash); 1105 } 1106 1107 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1108 { 1109 struct inet_timewait_sock *tw = inet_twsk(sk); 1110 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1111 1112 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1113 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1114 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1115 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1116 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1117 tw->tw_txhash); 1118 1119 inet_twsk_put(tw); 1120 } 1121 1122 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1123 struct request_sock *req) 1124 { 1125 int l3index; 1126 1127 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1128 1129 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1130 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1131 */ 1132 /* RFC 7323 2.3 1133 * The window field (SEG.WND) of every outgoing segment, with the 1134 * exception of <SYN> segments, MUST be right-shifted by 1135 * Rcv.Wind.Shift bits: 1136 */ 1137 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1138 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1139 tcp_rsk(req)->rcv_nxt, 1140 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1141 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1142 req->ts_recent, sk->sk_bound_dev_if, 1143 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1144 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, 1145 tcp_rsk(req)->txhash); 1146 } 1147 1148 1149 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1150 { 1151 #ifdef CONFIG_SYN_COOKIES 1152 const struct tcphdr *th = tcp_hdr(skb); 1153 1154 if (!th->syn) 1155 sk = cookie_v6_check(sk, skb); 1156 #endif 1157 return sk; 1158 } 1159 1160 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1161 struct tcphdr *th, u32 *cookie) 1162 { 1163 u16 mss = 0; 1164 #ifdef CONFIG_SYN_COOKIES 1165 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1166 &tcp_request_sock_ipv6_ops, sk, th); 1167 if (mss) { 1168 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1169 tcp_synq_overflow(sk); 1170 } 1171 #endif 1172 return mss; 1173 } 1174 1175 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1176 { 1177 if (skb->protocol == htons(ETH_P_IP)) 1178 return tcp_v4_conn_request(sk, skb); 1179 1180 if (!ipv6_unicast_destination(skb)) 1181 goto drop; 1182 1183 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1184 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1185 return 0; 1186 } 1187 1188 return tcp_conn_request(&tcp6_request_sock_ops, 1189 &tcp_request_sock_ipv6_ops, sk, skb); 1190 1191 drop: 1192 tcp_listendrop(sk); 1193 return 0; /* don't send reset */ 1194 } 1195 1196 static void tcp_v6_restore_cb(struct sk_buff *skb) 1197 { 1198 /* We need to move header back to the beginning if xfrm6_policy_check() 1199 * and tcp_v6_fill_cb() are going to be called again. 1200 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1201 */ 1202 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1203 sizeof(struct inet6_skb_parm)); 1204 } 1205 1206 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1207 struct request_sock *req, 1208 struct dst_entry *dst, 1209 struct request_sock *req_unhash, 1210 bool *own_req) 1211 { 1212 struct inet_request_sock *ireq; 1213 struct ipv6_pinfo *newnp; 1214 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1215 struct ipv6_txoptions *opt; 1216 struct inet_sock *newinet; 1217 bool found_dup_sk = false; 1218 struct tcp_sock *newtp; 1219 struct sock *newsk; 1220 #ifdef CONFIG_TCP_MD5SIG 1221 struct tcp_md5sig_key *key; 1222 int l3index; 1223 #endif 1224 struct flowi6 fl6; 1225 1226 if (skb->protocol == htons(ETH_P_IP)) { 1227 /* 1228 * v6 mapped 1229 */ 1230 1231 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1232 req_unhash, own_req); 1233 1234 if (!newsk) 1235 return NULL; 1236 1237 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1238 1239 newnp = tcp_inet6_sk(newsk); 1240 newtp = tcp_sk(newsk); 1241 1242 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1243 1244 newnp->saddr = newsk->sk_v6_rcv_saddr; 1245 1246 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1247 if (sk_is_mptcp(newsk)) 1248 mptcpv6_handle_mapped(newsk, true); 1249 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1250 #ifdef CONFIG_TCP_MD5SIG 1251 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1252 #endif 1253 1254 newnp->ipv6_mc_list = NULL; 1255 newnp->ipv6_ac_list = NULL; 1256 newnp->ipv6_fl_list = NULL; 1257 newnp->pktoptions = NULL; 1258 newnp->opt = NULL; 1259 newnp->mcast_oif = inet_iif(skb); 1260 newnp->mcast_hops = ip_hdr(skb)->ttl; 1261 newnp->rcv_flowinfo = 0; 1262 if (np->repflow) 1263 newnp->flow_label = 0; 1264 1265 /* 1266 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1267 * here, tcp_create_openreq_child now does this for us, see the comment in 1268 * that function for the gory details. -acme 1269 */ 1270 1271 /* It is tricky place. Until this moment IPv4 tcp 1272 worked with IPv6 icsk.icsk_af_ops. 1273 Sync it now. 1274 */ 1275 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1276 1277 return newsk; 1278 } 1279 1280 ireq = inet_rsk(req); 1281 1282 if (sk_acceptq_is_full(sk)) 1283 goto out_overflow; 1284 1285 if (!dst) { 1286 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1287 if (!dst) 1288 goto out; 1289 } 1290 1291 newsk = tcp_create_openreq_child(sk, req, skb); 1292 if (!newsk) 1293 goto out_nonewsk; 1294 1295 /* 1296 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1297 * count here, tcp_create_openreq_child now does this for us, see the 1298 * comment in that function for the gory details. -acme 1299 */ 1300 1301 newsk->sk_gso_type = SKB_GSO_TCPV6; 1302 ip6_dst_store(newsk, dst, NULL, NULL); 1303 inet6_sk_rx_dst_set(newsk, skb); 1304 1305 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1306 1307 newtp = tcp_sk(newsk); 1308 newinet = inet_sk(newsk); 1309 newnp = tcp_inet6_sk(newsk); 1310 1311 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1312 1313 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1314 newnp->saddr = ireq->ir_v6_loc_addr; 1315 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1316 newsk->sk_bound_dev_if = ireq->ir_iif; 1317 1318 /* Now IPv6 options... 1319 1320 First: no IPv4 options. 1321 */ 1322 newinet->inet_opt = NULL; 1323 newnp->ipv6_mc_list = NULL; 1324 newnp->ipv6_ac_list = NULL; 1325 newnp->ipv6_fl_list = NULL; 1326 1327 /* Clone RX bits */ 1328 newnp->rxopt.all = np->rxopt.all; 1329 1330 newnp->pktoptions = NULL; 1331 newnp->opt = NULL; 1332 newnp->mcast_oif = tcp_v6_iif(skb); 1333 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1334 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1335 if (np->repflow) 1336 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1337 1338 /* Set ToS of the new socket based upon the value of incoming SYN. 1339 * ECT bits are set later in tcp_init_transfer(). 1340 */ 1341 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1342 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1343 1344 /* Clone native IPv6 options from listening socket (if any) 1345 1346 Yes, keeping reference count would be much more clever, 1347 but we make one more one thing there: reattach optmem 1348 to newsk. 1349 */ 1350 opt = ireq->ipv6_opt; 1351 if (!opt) 1352 opt = rcu_dereference(np->opt); 1353 if (opt) { 1354 opt = ipv6_dup_options(newsk, opt); 1355 RCU_INIT_POINTER(newnp->opt, opt); 1356 } 1357 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1358 if (opt) 1359 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1360 opt->opt_flen; 1361 1362 tcp_ca_openreq_child(newsk, dst); 1363 1364 tcp_sync_mss(newsk, dst_mtu(dst)); 1365 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1366 1367 tcp_initialize_rcv_mss(newsk); 1368 1369 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1370 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1371 1372 #ifdef CONFIG_TCP_MD5SIG 1373 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1374 1375 /* Copy over the MD5 key from the original socket */ 1376 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1377 if (key) { 1378 /* We're using one, so create a matching key 1379 * on the newsk structure. If we fail to get 1380 * memory, then we end up not copying the key 1381 * across. Shucks. 1382 */ 1383 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1384 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1385 sk_gfp_mask(sk, GFP_ATOMIC)); 1386 } 1387 #endif 1388 1389 if (__inet_inherit_port(sk, newsk) < 0) { 1390 inet_csk_prepare_forced_close(newsk); 1391 tcp_done(newsk); 1392 goto out; 1393 } 1394 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1395 &found_dup_sk); 1396 if (*own_req) { 1397 tcp_move_syn(newtp, req); 1398 1399 /* Clone pktoptions received with SYN, if we own the req */ 1400 if (ireq->pktopts) { 1401 newnp->pktoptions = skb_clone(ireq->pktopts, 1402 sk_gfp_mask(sk, GFP_ATOMIC)); 1403 consume_skb(ireq->pktopts); 1404 ireq->pktopts = NULL; 1405 if (newnp->pktoptions) { 1406 tcp_v6_restore_cb(newnp->pktoptions); 1407 skb_set_owner_r(newnp->pktoptions, newsk); 1408 } 1409 } 1410 } else { 1411 if (!req_unhash && found_dup_sk) { 1412 /* This code path should only be executed in the 1413 * syncookie case only 1414 */ 1415 bh_unlock_sock(newsk); 1416 sock_put(newsk); 1417 newsk = NULL; 1418 } 1419 } 1420 1421 return newsk; 1422 1423 out_overflow: 1424 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1425 out_nonewsk: 1426 dst_release(dst); 1427 out: 1428 tcp_listendrop(sk); 1429 return NULL; 1430 } 1431 1432 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1433 u32)); 1434 /* The socket must have it's spinlock held when we get 1435 * here, unless it is a TCP_LISTEN socket. 1436 * 1437 * We have a potential double-lock case here, so even when 1438 * doing backlog processing we use the BH locking scheme. 1439 * This is because we cannot sleep with the original spinlock 1440 * held. 1441 */ 1442 INDIRECT_CALLABLE_SCOPE 1443 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1444 { 1445 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1446 struct sk_buff *opt_skb = NULL; 1447 enum skb_drop_reason reason; 1448 struct tcp_sock *tp; 1449 1450 /* Imagine: socket is IPv6. IPv4 packet arrives, 1451 goes to IPv4 receive handler and backlogged. 1452 From backlog it always goes here. Kerboom... 1453 Fortunately, tcp_rcv_established and rcv_established 1454 handle them correctly, but it is not case with 1455 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1456 */ 1457 1458 if (skb->protocol == htons(ETH_P_IP)) 1459 return tcp_v4_do_rcv(sk, skb); 1460 1461 /* 1462 * socket locking is here for SMP purposes as backlog rcv 1463 * is currently called with bh processing disabled. 1464 */ 1465 1466 /* Do Stevens' IPV6_PKTOPTIONS. 1467 1468 Yes, guys, it is the only place in our code, where we 1469 may make it not affecting IPv4. 1470 The rest of code is protocol independent, 1471 and I do not like idea to uglify IPv4. 1472 1473 Actually, all the idea behind IPV6_PKTOPTIONS 1474 looks not very well thought. For now we latch 1475 options, received in the last packet, enqueued 1476 by tcp. Feel free to propose better solution. 1477 --ANK (980728) 1478 */ 1479 if (np->rxopt.all) 1480 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1481 1482 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1483 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1484 struct dst_entry *dst; 1485 1486 dst = rcu_dereference_protected(sk->sk_rx_dst, 1487 lockdep_sock_is_held(sk)); 1488 1489 sock_rps_save_rxhash(sk, skb); 1490 sk_mark_napi_id(sk, skb); 1491 if (dst) { 1492 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1493 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1494 dst, sk->sk_rx_dst_cookie) == NULL) { 1495 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1496 dst_release(dst); 1497 } 1498 } 1499 1500 tcp_rcv_established(sk, skb); 1501 if (opt_skb) 1502 goto ipv6_pktoptions; 1503 return 0; 1504 } 1505 1506 if (tcp_checksum_complete(skb)) 1507 goto csum_err; 1508 1509 if (sk->sk_state == TCP_LISTEN) { 1510 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1511 1512 if (!nsk) 1513 goto discard; 1514 1515 if (nsk != sk) { 1516 if (tcp_child_process(sk, nsk, skb)) 1517 goto reset; 1518 if (opt_skb) 1519 __kfree_skb(opt_skb); 1520 return 0; 1521 } 1522 } else 1523 sock_rps_save_rxhash(sk, skb); 1524 1525 if (tcp_rcv_state_process(sk, skb)) 1526 goto reset; 1527 if (opt_skb) 1528 goto ipv6_pktoptions; 1529 return 0; 1530 1531 reset: 1532 tcp_v6_send_reset(sk, skb); 1533 discard: 1534 if (opt_skb) 1535 __kfree_skb(opt_skb); 1536 kfree_skb_reason(skb, reason); 1537 return 0; 1538 csum_err: 1539 reason = SKB_DROP_REASON_TCP_CSUM; 1540 trace_tcp_bad_csum(skb); 1541 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1542 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1543 goto discard; 1544 1545 1546 ipv6_pktoptions: 1547 /* Do you ask, what is it? 1548 1549 1. skb was enqueued by tcp. 1550 2. skb is added to tail of read queue, rather than out of order. 1551 3. socket is not in passive state. 1552 4. Finally, it really contains options, which user wants to receive. 1553 */ 1554 tp = tcp_sk(sk); 1555 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1556 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1557 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1558 np->mcast_oif = tcp_v6_iif(opt_skb); 1559 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1560 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1561 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1562 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1563 if (np->repflow) 1564 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1565 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1566 skb_set_owner_r(opt_skb, sk); 1567 tcp_v6_restore_cb(opt_skb); 1568 opt_skb = xchg(&np->pktoptions, opt_skb); 1569 } else { 1570 __kfree_skb(opt_skb); 1571 opt_skb = xchg(&np->pktoptions, NULL); 1572 } 1573 } 1574 1575 consume_skb(opt_skb); 1576 return 0; 1577 } 1578 1579 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1580 const struct tcphdr *th) 1581 { 1582 /* This is tricky: we move IP6CB at its correct location into 1583 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1584 * _decode_session6() uses IP6CB(). 1585 * barrier() makes sure compiler won't play aliasing games. 1586 */ 1587 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1588 sizeof(struct inet6_skb_parm)); 1589 barrier(); 1590 1591 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1592 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1593 skb->len - th->doff*4); 1594 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1595 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1596 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1597 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1598 TCP_SKB_CB(skb)->sacked = 0; 1599 TCP_SKB_CB(skb)->has_rxtstamp = 1600 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1601 } 1602 1603 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1604 { 1605 enum skb_drop_reason drop_reason; 1606 int sdif = inet6_sdif(skb); 1607 int dif = inet6_iif(skb); 1608 const struct tcphdr *th; 1609 const struct ipv6hdr *hdr; 1610 bool refcounted; 1611 struct sock *sk; 1612 int ret; 1613 struct net *net = dev_net(skb->dev); 1614 1615 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1616 if (skb->pkt_type != PACKET_HOST) 1617 goto discard_it; 1618 1619 /* 1620 * Count it even if it's bad. 1621 */ 1622 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1623 1624 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1625 goto discard_it; 1626 1627 th = (const struct tcphdr *)skb->data; 1628 1629 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1630 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1631 goto bad_packet; 1632 } 1633 if (!pskb_may_pull(skb, th->doff*4)) 1634 goto discard_it; 1635 1636 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1637 goto csum_error; 1638 1639 th = (const struct tcphdr *)skb->data; 1640 hdr = ipv6_hdr(skb); 1641 1642 lookup: 1643 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), 1644 th->source, th->dest, inet6_iif(skb), sdif, 1645 &refcounted); 1646 if (!sk) 1647 goto no_tcp_socket; 1648 1649 process: 1650 if (sk->sk_state == TCP_TIME_WAIT) 1651 goto do_time_wait; 1652 1653 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1654 struct request_sock *req = inet_reqsk(sk); 1655 bool req_stolen = false; 1656 struct sock *nsk; 1657 1658 sk = req->rsk_listener; 1659 drop_reason = tcp_inbound_md5_hash(sk, skb, 1660 &hdr->saddr, &hdr->daddr, 1661 AF_INET6, dif, sdif); 1662 if (drop_reason) { 1663 sk_drops_add(sk, skb); 1664 reqsk_put(req); 1665 goto discard_it; 1666 } 1667 if (tcp_checksum_complete(skb)) { 1668 reqsk_put(req); 1669 goto csum_error; 1670 } 1671 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1672 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1673 if (!nsk) { 1674 inet_csk_reqsk_queue_drop_and_put(sk, req); 1675 goto lookup; 1676 } 1677 sk = nsk; 1678 /* reuseport_migrate_sock() has already held one sk_refcnt 1679 * before returning. 1680 */ 1681 } else { 1682 sock_hold(sk); 1683 } 1684 refcounted = true; 1685 nsk = NULL; 1686 if (!tcp_filter(sk, skb)) { 1687 th = (const struct tcphdr *)skb->data; 1688 hdr = ipv6_hdr(skb); 1689 tcp_v6_fill_cb(skb, hdr, th); 1690 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1691 } else { 1692 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1693 } 1694 if (!nsk) { 1695 reqsk_put(req); 1696 if (req_stolen) { 1697 /* Another cpu got exclusive access to req 1698 * and created a full blown socket. 1699 * Try to feed this packet to this socket 1700 * instead of discarding it. 1701 */ 1702 tcp_v6_restore_cb(skb); 1703 sock_put(sk); 1704 goto lookup; 1705 } 1706 goto discard_and_relse; 1707 } 1708 if (nsk == sk) { 1709 reqsk_put(req); 1710 tcp_v6_restore_cb(skb); 1711 } else if (tcp_child_process(sk, nsk, skb)) { 1712 tcp_v6_send_reset(nsk, skb); 1713 goto discard_and_relse; 1714 } else { 1715 sock_put(sk); 1716 return 0; 1717 } 1718 } 1719 1720 if (static_branch_unlikely(&ip6_min_hopcount)) { 1721 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1722 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1723 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1724 goto discard_and_relse; 1725 } 1726 } 1727 1728 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1729 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1730 goto discard_and_relse; 1731 } 1732 1733 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1734 AF_INET6, dif, sdif); 1735 if (drop_reason) 1736 goto discard_and_relse; 1737 1738 if (tcp_filter(sk, skb)) { 1739 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1740 goto discard_and_relse; 1741 } 1742 th = (const struct tcphdr *)skb->data; 1743 hdr = ipv6_hdr(skb); 1744 tcp_v6_fill_cb(skb, hdr, th); 1745 1746 skb->dev = NULL; 1747 1748 if (sk->sk_state == TCP_LISTEN) { 1749 ret = tcp_v6_do_rcv(sk, skb); 1750 goto put_and_return; 1751 } 1752 1753 sk_incoming_cpu_update(sk); 1754 1755 bh_lock_sock_nested(sk); 1756 tcp_segs_in(tcp_sk(sk), skb); 1757 ret = 0; 1758 if (!sock_owned_by_user(sk)) { 1759 ret = tcp_v6_do_rcv(sk, skb); 1760 } else { 1761 if (tcp_add_backlog(sk, skb, &drop_reason)) 1762 goto discard_and_relse; 1763 } 1764 bh_unlock_sock(sk); 1765 put_and_return: 1766 if (refcounted) 1767 sock_put(sk); 1768 return ret ? -1 : 0; 1769 1770 no_tcp_socket: 1771 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1772 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1773 goto discard_it; 1774 1775 tcp_v6_fill_cb(skb, hdr, th); 1776 1777 if (tcp_checksum_complete(skb)) { 1778 csum_error: 1779 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1780 trace_tcp_bad_csum(skb); 1781 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1782 bad_packet: 1783 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1784 } else { 1785 tcp_v6_send_reset(NULL, skb); 1786 } 1787 1788 discard_it: 1789 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1790 kfree_skb_reason(skb, drop_reason); 1791 return 0; 1792 1793 discard_and_relse: 1794 sk_drops_add(sk, skb); 1795 if (refcounted) 1796 sock_put(sk); 1797 goto discard_it; 1798 1799 do_time_wait: 1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1801 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1802 inet_twsk_put(inet_twsk(sk)); 1803 goto discard_it; 1804 } 1805 1806 tcp_v6_fill_cb(skb, hdr, th); 1807 1808 if (tcp_checksum_complete(skb)) { 1809 inet_twsk_put(inet_twsk(sk)); 1810 goto csum_error; 1811 } 1812 1813 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1814 case TCP_TW_SYN: 1815 { 1816 struct sock *sk2; 1817 1818 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1819 skb, __tcp_hdrlen(th), 1820 &ipv6_hdr(skb)->saddr, th->source, 1821 &ipv6_hdr(skb)->daddr, 1822 ntohs(th->dest), 1823 tcp_v6_iif_l3_slave(skb), 1824 sdif); 1825 if (sk2) { 1826 struct inet_timewait_sock *tw = inet_twsk(sk); 1827 inet_twsk_deschedule_put(tw); 1828 sk = sk2; 1829 tcp_v6_restore_cb(skb); 1830 refcounted = false; 1831 goto process; 1832 } 1833 } 1834 /* to ACK */ 1835 fallthrough; 1836 case TCP_TW_ACK: 1837 tcp_v6_timewait_ack(sk, skb); 1838 break; 1839 case TCP_TW_RST: 1840 tcp_v6_send_reset(sk, skb); 1841 inet_twsk_deschedule_put(inet_twsk(sk)); 1842 goto discard_it; 1843 case TCP_TW_SUCCESS: 1844 ; 1845 } 1846 goto discard_it; 1847 } 1848 1849 void tcp_v6_early_demux(struct sk_buff *skb) 1850 { 1851 struct net *net = dev_net(skb->dev); 1852 const struct ipv6hdr *hdr; 1853 const struct tcphdr *th; 1854 struct sock *sk; 1855 1856 if (skb->pkt_type != PACKET_HOST) 1857 return; 1858 1859 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1860 return; 1861 1862 hdr = ipv6_hdr(skb); 1863 th = tcp_hdr(skb); 1864 1865 if (th->doff < sizeof(struct tcphdr) / 4) 1866 return; 1867 1868 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1869 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 1870 &hdr->saddr, th->source, 1871 &hdr->daddr, ntohs(th->dest), 1872 inet6_iif(skb), inet6_sdif(skb)); 1873 if (sk) { 1874 skb->sk = sk; 1875 skb->destructor = sock_edemux; 1876 if (sk_fullsock(sk)) { 1877 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1878 1879 if (dst) 1880 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1881 if (dst && 1882 sk->sk_rx_dst_ifindex == skb->skb_iif) 1883 skb_dst_set_noref(skb, dst); 1884 } 1885 } 1886 } 1887 1888 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1889 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1890 .twsk_unique = tcp_twsk_unique, 1891 .twsk_destructor = tcp_twsk_destructor, 1892 }; 1893 1894 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1895 { 1896 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1897 } 1898 1899 const struct inet_connection_sock_af_ops ipv6_specific = { 1900 .queue_xmit = inet6_csk_xmit, 1901 .send_check = tcp_v6_send_check, 1902 .rebuild_header = inet6_sk_rebuild_header, 1903 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1904 .conn_request = tcp_v6_conn_request, 1905 .syn_recv_sock = tcp_v6_syn_recv_sock, 1906 .net_header_len = sizeof(struct ipv6hdr), 1907 .net_frag_header_len = sizeof(struct frag_hdr), 1908 .setsockopt = ipv6_setsockopt, 1909 .getsockopt = ipv6_getsockopt, 1910 .addr2sockaddr = inet6_csk_addr2sockaddr, 1911 .sockaddr_len = sizeof(struct sockaddr_in6), 1912 .mtu_reduced = tcp_v6_mtu_reduced, 1913 }; 1914 1915 #ifdef CONFIG_TCP_MD5SIG 1916 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1917 .md5_lookup = tcp_v6_md5_lookup, 1918 .calc_md5_hash = tcp_v6_md5_hash_skb, 1919 .md5_parse = tcp_v6_parse_md5_keys, 1920 }; 1921 #endif 1922 1923 /* 1924 * TCP over IPv4 via INET6 API 1925 */ 1926 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1927 .queue_xmit = ip_queue_xmit, 1928 .send_check = tcp_v4_send_check, 1929 .rebuild_header = inet_sk_rebuild_header, 1930 .sk_rx_dst_set = inet_sk_rx_dst_set, 1931 .conn_request = tcp_v6_conn_request, 1932 .syn_recv_sock = tcp_v6_syn_recv_sock, 1933 .net_header_len = sizeof(struct iphdr), 1934 .setsockopt = ipv6_setsockopt, 1935 .getsockopt = ipv6_getsockopt, 1936 .addr2sockaddr = inet6_csk_addr2sockaddr, 1937 .sockaddr_len = sizeof(struct sockaddr_in6), 1938 .mtu_reduced = tcp_v4_mtu_reduced, 1939 }; 1940 1941 #ifdef CONFIG_TCP_MD5SIG 1942 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1943 .md5_lookup = tcp_v4_md5_lookup, 1944 .calc_md5_hash = tcp_v4_md5_hash_skb, 1945 .md5_parse = tcp_v6_parse_md5_keys, 1946 }; 1947 #endif 1948 1949 /* NOTE: A lot of things set to zero explicitly by call to 1950 * sk_alloc() so need not be done here. 1951 */ 1952 static int tcp_v6_init_sock(struct sock *sk) 1953 { 1954 struct inet_connection_sock *icsk = inet_csk(sk); 1955 1956 tcp_init_sock(sk); 1957 1958 icsk->icsk_af_ops = &ipv6_specific; 1959 1960 #ifdef CONFIG_TCP_MD5SIG 1961 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1962 #endif 1963 1964 return 0; 1965 } 1966 1967 static void tcp_v6_destroy_sock(struct sock *sk) 1968 { 1969 tcp_v4_destroy_sock(sk); 1970 inet6_destroy_sock(sk); 1971 } 1972 1973 #ifdef CONFIG_PROC_FS 1974 /* Proc filesystem TCPv6 sock list dumping. */ 1975 static void get_openreq6(struct seq_file *seq, 1976 const struct request_sock *req, int i) 1977 { 1978 long ttd = req->rsk_timer.expires - jiffies; 1979 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1980 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1981 1982 if (ttd < 0) 1983 ttd = 0; 1984 1985 seq_printf(seq, 1986 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1987 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1988 i, 1989 src->s6_addr32[0], src->s6_addr32[1], 1990 src->s6_addr32[2], src->s6_addr32[3], 1991 inet_rsk(req)->ir_num, 1992 dest->s6_addr32[0], dest->s6_addr32[1], 1993 dest->s6_addr32[2], dest->s6_addr32[3], 1994 ntohs(inet_rsk(req)->ir_rmt_port), 1995 TCP_SYN_RECV, 1996 0, 0, /* could print option size, but that is af dependent. */ 1997 1, /* timers active (only the expire timer) */ 1998 jiffies_to_clock_t(ttd), 1999 req->num_timeout, 2000 from_kuid_munged(seq_user_ns(seq), 2001 sock_i_uid(req->rsk_listener)), 2002 0, /* non standard timer */ 2003 0, /* open_requests have no inode */ 2004 0, req); 2005 } 2006 2007 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2008 { 2009 const struct in6_addr *dest, *src; 2010 __u16 destp, srcp; 2011 int timer_active; 2012 unsigned long timer_expires; 2013 const struct inet_sock *inet = inet_sk(sp); 2014 const struct tcp_sock *tp = tcp_sk(sp); 2015 const struct inet_connection_sock *icsk = inet_csk(sp); 2016 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2017 int rx_queue; 2018 int state; 2019 2020 dest = &sp->sk_v6_daddr; 2021 src = &sp->sk_v6_rcv_saddr; 2022 destp = ntohs(inet->inet_dport); 2023 srcp = ntohs(inet->inet_sport); 2024 2025 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2026 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2027 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2028 timer_active = 1; 2029 timer_expires = icsk->icsk_timeout; 2030 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2031 timer_active = 4; 2032 timer_expires = icsk->icsk_timeout; 2033 } else if (timer_pending(&sp->sk_timer)) { 2034 timer_active = 2; 2035 timer_expires = sp->sk_timer.expires; 2036 } else { 2037 timer_active = 0; 2038 timer_expires = jiffies; 2039 } 2040 2041 state = inet_sk_state_load(sp); 2042 if (state == TCP_LISTEN) 2043 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2044 else 2045 /* Because we don't lock the socket, 2046 * we might find a transient negative value. 2047 */ 2048 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2049 READ_ONCE(tp->copied_seq), 0); 2050 2051 seq_printf(seq, 2052 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2053 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2054 i, 2055 src->s6_addr32[0], src->s6_addr32[1], 2056 src->s6_addr32[2], src->s6_addr32[3], srcp, 2057 dest->s6_addr32[0], dest->s6_addr32[1], 2058 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2059 state, 2060 READ_ONCE(tp->write_seq) - tp->snd_una, 2061 rx_queue, 2062 timer_active, 2063 jiffies_delta_to_clock_t(timer_expires - jiffies), 2064 icsk->icsk_retransmits, 2065 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2066 icsk->icsk_probes_out, 2067 sock_i_ino(sp), 2068 refcount_read(&sp->sk_refcnt), sp, 2069 jiffies_to_clock_t(icsk->icsk_rto), 2070 jiffies_to_clock_t(icsk->icsk_ack.ato), 2071 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2072 tcp_snd_cwnd(tp), 2073 state == TCP_LISTEN ? 2074 fastopenq->max_qlen : 2075 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2076 ); 2077 } 2078 2079 static void get_timewait6_sock(struct seq_file *seq, 2080 struct inet_timewait_sock *tw, int i) 2081 { 2082 long delta = tw->tw_timer.expires - jiffies; 2083 const struct in6_addr *dest, *src; 2084 __u16 destp, srcp; 2085 2086 dest = &tw->tw_v6_daddr; 2087 src = &tw->tw_v6_rcv_saddr; 2088 destp = ntohs(tw->tw_dport); 2089 srcp = ntohs(tw->tw_sport); 2090 2091 seq_printf(seq, 2092 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2093 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2094 i, 2095 src->s6_addr32[0], src->s6_addr32[1], 2096 src->s6_addr32[2], src->s6_addr32[3], srcp, 2097 dest->s6_addr32[0], dest->s6_addr32[1], 2098 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2099 tw->tw_substate, 0, 0, 2100 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2101 refcount_read(&tw->tw_refcnt), tw); 2102 } 2103 2104 static int tcp6_seq_show(struct seq_file *seq, void *v) 2105 { 2106 struct tcp_iter_state *st; 2107 struct sock *sk = v; 2108 2109 if (v == SEQ_START_TOKEN) { 2110 seq_puts(seq, 2111 " sl " 2112 "local_address " 2113 "remote_address " 2114 "st tx_queue rx_queue tr tm->when retrnsmt" 2115 " uid timeout inode\n"); 2116 goto out; 2117 } 2118 st = seq->private; 2119 2120 if (sk->sk_state == TCP_TIME_WAIT) 2121 get_timewait6_sock(seq, v, st->num); 2122 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2123 get_openreq6(seq, v, st->num); 2124 else 2125 get_tcp6_sock(seq, v, st->num); 2126 out: 2127 return 0; 2128 } 2129 2130 static const struct seq_operations tcp6_seq_ops = { 2131 .show = tcp6_seq_show, 2132 .start = tcp_seq_start, 2133 .next = tcp_seq_next, 2134 .stop = tcp_seq_stop, 2135 }; 2136 2137 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2138 .family = AF_INET6, 2139 }; 2140 2141 int __net_init tcp6_proc_init(struct net *net) 2142 { 2143 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2144 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2145 return -ENOMEM; 2146 return 0; 2147 } 2148 2149 void tcp6_proc_exit(struct net *net) 2150 { 2151 remove_proc_entry("tcp6", net->proc_net); 2152 } 2153 #endif 2154 2155 struct proto tcpv6_prot = { 2156 .name = "TCPv6", 2157 .owner = THIS_MODULE, 2158 .close = tcp_close, 2159 .pre_connect = tcp_v6_pre_connect, 2160 .connect = tcp_v6_connect, 2161 .disconnect = tcp_disconnect, 2162 .accept = inet_csk_accept, 2163 .ioctl = tcp_ioctl, 2164 .init = tcp_v6_init_sock, 2165 .destroy = tcp_v6_destroy_sock, 2166 .shutdown = tcp_shutdown, 2167 .setsockopt = tcp_setsockopt, 2168 .getsockopt = tcp_getsockopt, 2169 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2170 .keepalive = tcp_set_keepalive, 2171 .recvmsg = tcp_recvmsg, 2172 .sendmsg = tcp_sendmsg, 2173 .sendpage = tcp_sendpage, 2174 .backlog_rcv = tcp_v6_do_rcv, 2175 .release_cb = tcp_release_cb, 2176 .hash = inet6_hash, 2177 .unhash = inet_unhash, 2178 .get_port = inet_csk_get_port, 2179 .put_port = inet_put_port, 2180 #ifdef CONFIG_BPF_SYSCALL 2181 .psock_update_sk_prot = tcp_bpf_update_proto, 2182 #endif 2183 .enter_memory_pressure = tcp_enter_memory_pressure, 2184 .leave_memory_pressure = tcp_leave_memory_pressure, 2185 .stream_memory_free = tcp_stream_memory_free, 2186 .sockets_allocated = &tcp_sockets_allocated, 2187 2188 .memory_allocated = &tcp_memory_allocated, 2189 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2190 2191 .memory_pressure = &tcp_memory_pressure, 2192 .orphan_count = &tcp_orphan_count, 2193 .sysctl_mem = sysctl_tcp_mem, 2194 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2195 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2196 .max_header = MAX_TCP_HEADER, 2197 .obj_size = sizeof(struct tcp6_sock), 2198 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2199 .twsk_prot = &tcp6_timewait_sock_ops, 2200 .rsk_prot = &tcp6_request_sock_ops, 2201 .h.hashinfo = NULL, 2202 .no_autobind = true, 2203 .diag_destroy = tcp_abort, 2204 }; 2205 EXPORT_SYMBOL_GPL(tcpv6_prot); 2206 2207 static const struct inet6_protocol tcpv6_protocol = { 2208 .handler = tcp_v6_rcv, 2209 .err_handler = tcp_v6_err, 2210 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2211 }; 2212 2213 static struct inet_protosw tcpv6_protosw = { 2214 .type = SOCK_STREAM, 2215 .protocol = IPPROTO_TCP, 2216 .prot = &tcpv6_prot, 2217 .ops = &inet6_stream_ops, 2218 .flags = INET_PROTOSW_PERMANENT | 2219 INET_PROTOSW_ICSK, 2220 }; 2221 2222 static int __net_init tcpv6_net_init(struct net *net) 2223 { 2224 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2225 SOCK_RAW, IPPROTO_TCP, net); 2226 } 2227 2228 static void __net_exit tcpv6_net_exit(struct net *net) 2229 { 2230 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2231 } 2232 2233 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2234 { 2235 tcp_twsk_purge(net_exit_list, AF_INET6); 2236 } 2237 2238 static struct pernet_operations tcpv6_net_ops = { 2239 .init = tcpv6_net_init, 2240 .exit = tcpv6_net_exit, 2241 .exit_batch = tcpv6_net_exit_batch, 2242 }; 2243 2244 int __init tcpv6_init(void) 2245 { 2246 int ret; 2247 2248 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2249 if (ret) 2250 goto out; 2251 2252 /* register inet6 protocol */ 2253 ret = inet6_register_protosw(&tcpv6_protosw); 2254 if (ret) 2255 goto out_tcpv6_protocol; 2256 2257 ret = register_pernet_subsys(&tcpv6_net_ops); 2258 if (ret) 2259 goto out_tcpv6_protosw; 2260 2261 ret = mptcpv6_init(); 2262 if (ret) 2263 goto out_tcpv6_pernet_subsys; 2264 2265 out: 2266 return ret; 2267 2268 out_tcpv6_pernet_subsys: 2269 unregister_pernet_subsys(&tcpv6_net_ops); 2270 out_tcpv6_protosw: 2271 inet6_unregister_protosw(&tcpv6_protosw); 2272 out_tcpv6_protocol: 2273 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2274 goto out; 2275 } 2276 2277 void tcpv6_exit(void) 2278 { 2279 unregister_pernet_subsys(&tcpv6_net_ops); 2280 inet6_unregister_protosw(&tcpv6_protosw); 2281 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2282 } 2283