1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_connection_sock *icsk = inet_csk(sk); 150 struct in6_addr *saddr = NULL, *final_p, final; 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct inet_sock *inet = inet_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk); 155 struct net *net = sock_net(sk); 156 struct ipv6_txoptions *opt; 157 struct dst_entry *dst; 158 struct flowi6 fl6; 159 int addr_type; 160 int err; 161 162 if (addr_len < SIN6_LEN_RFC2133) 163 return -EINVAL; 164 165 if (usin->sin6_family != AF_INET6) 166 return -EAFNOSUPPORT; 167 168 memset(&fl6, 0, sizeof(fl6)); 169 170 if (np->sndflow) { 171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 172 IP6_ECN_flow_init(fl6.flowlabel); 173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 174 struct ip6_flowlabel *flowlabel; 175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 176 if (IS_ERR(flowlabel)) 177 return -EINVAL; 178 fl6_sock_release(flowlabel); 179 } 180 } 181 182 /* 183 * connect() to INADDR_ANY means loopback (BSD'ism). 184 */ 185 186 if (ipv6_addr_any(&usin->sin6_addr)) { 187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 189 &usin->sin6_addr); 190 else 191 usin->sin6_addr = in6addr_loopback; 192 } 193 194 addr_type = ipv6_addr_type(&usin->sin6_addr); 195 196 if (addr_type & IPV6_ADDR_MULTICAST) 197 return -ENETUNREACH; 198 199 if (addr_type&IPV6_ADDR_LINKLOCAL) { 200 if (addr_len >= sizeof(struct sockaddr_in6) && 201 usin->sin6_scope_id) { 202 /* If interface is set while binding, indices 203 * must coincide. 204 */ 205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 206 return -EINVAL; 207 208 sk->sk_bound_dev_if = usin->sin6_scope_id; 209 } 210 211 /* Connect to link-local address requires an interface */ 212 if (!sk->sk_bound_dev_if) 213 return -EINVAL; 214 } 215 216 if (tp->rx_opt.ts_recent_stamp && 217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 218 tp->rx_opt.ts_recent = 0; 219 tp->rx_opt.ts_recent_stamp = 0; 220 WRITE_ONCE(tp->write_seq, 0); 221 } 222 223 sk->sk_v6_daddr = usin->sin6_addr; 224 np->flow_label = fl6.flowlabel; 225 226 /* 227 * TCP over IPv4 228 */ 229 230 if (addr_type & IPV6_ADDR_MAPPED) { 231 u32 exthdrlen = icsk->icsk_ext_hdr_len; 232 struct sockaddr_in sin; 233 234 if (ipv6_only_sock(sk)) 235 return -ENETUNREACH; 236 237 sin.sin_family = AF_INET; 238 sin.sin_port = usin->sin6_port; 239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 240 241 icsk->icsk_af_ops = &ipv6_mapped; 242 if (sk_is_mptcp(sk)) 243 mptcpv6_handle_mapped(sk, true); 244 sk->sk_backlog_rcv = tcp_v4_do_rcv; 245 #ifdef CONFIG_TCP_MD5SIG 246 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 247 #endif 248 249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 250 251 if (err) { 252 icsk->icsk_ext_hdr_len = exthdrlen; 253 icsk->icsk_af_ops = &ipv6_specific; 254 if (sk_is_mptcp(sk)) 255 mptcpv6_handle_mapped(sk, false); 256 sk->sk_backlog_rcv = tcp_v6_do_rcv; 257 #ifdef CONFIG_TCP_MD5SIG 258 tp->af_specific = &tcp_sock_ipv6_specific; 259 #endif 260 goto failure; 261 } 262 np->saddr = sk->sk_v6_rcv_saddr; 263 264 return err; 265 } 266 267 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 268 saddr = &sk->sk_v6_rcv_saddr; 269 270 fl6.flowi6_proto = IPPROTO_TCP; 271 fl6.daddr = sk->sk_v6_daddr; 272 fl6.saddr = saddr ? *saddr : np->saddr; 273 fl6.flowi6_oif = sk->sk_bound_dev_if; 274 fl6.flowi6_mark = sk->sk_mark; 275 fl6.fl6_dport = usin->sin6_port; 276 fl6.fl6_sport = inet->inet_sport; 277 fl6.flowi6_uid = sk->sk_uid; 278 279 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 280 final_p = fl6_update_dst(&fl6, opt, &final); 281 282 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 283 284 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 285 if (IS_ERR(dst)) { 286 err = PTR_ERR(dst); 287 goto failure; 288 } 289 290 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 291 292 if (!saddr) { 293 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; 294 struct in6_addr prev_v6_rcv_saddr; 295 296 if (icsk->icsk_bind2_hash) { 297 prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, 298 sk, net, inet->inet_num); 299 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 300 } 301 saddr = &fl6.saddr; 302 sk->sk_v6_rcv_saddr = *saddr; 303 304 if (prev_addr_hashbucket) { 305 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); 306 if (err) { 307 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; 308 goto failure; 309 } 310 } 311 } 312 313 /* set the source address */ 314 np->saddr = *saddr; 315 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 316 317 sk->sk_gso_type = SKB_GSO_TCPV6; 318 ip6_dst_store(sk, dst, NULL, NULL); 319 320 icsk->icsk_ext_hdr_len = 0; 321 if (opt) 322 icsk->icsk_ext_hdr_len = opt->opt_flen + 323 opt->opt_nflen; 324 325 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 326 327 inet->inet_dport = usin->sin6_port; 328 329 tcp_set_state(sk, TCP_SYN_SENT); 330 err = inet6_hash_connect(tcp_death_row, sk); 331 if (err) 332 goto late_failure; 333 334 sk_set_txhash(sk); 335 336 if (likely(!tp->repair)) { 337 if (!tp->write_seq) 338 WRITE_ONCE(tp->write_seq, 339 secure_tcpv6_seq(np->saddr.s6_addr32, 340 sk->sk_v6_daddr.s6_addr32, 341 inet->inet_sport, 342 inet->inet_dport)); 343 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 344 sk->sk_v6_daddr.s6_addr32); 345 } 346 347 if (tcp_fastopen_defer_connect(sk, &err)) 348 return err; 349 if (err) 350 goto late_failure; 351 352 err = tcp_connect(sk); 353 if (err) 354 goto late_failure; 355 356 return 0; 357 358 late_failure: 359 tcp_set_state(sk, TCP_CLOSE); 360 failure: 361 inet->inet_dport = 0; 362 sk->sk_route_caps = 0; 363 return err; 364 } 365 366 static void tcp_v6_mtu_reduced(struct sock *sk) 367 { 368 struct dst_entry *dst; 369 u32 mtu; 370 371 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 372 return; 373 374 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 375 376 /* Drop requests trying to increase our current mss. 377 * Check done in __ip6_rt_update_pmtu() is too late. 378 */ 379 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 380 return; 381 382 dst = inet6_csk_update_pmtu(sk, mtu); 383 if (!dst) 384 return; 385 386 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 387 tcp_sync_mss(sk, dst_mtu(dst)); 388 tcp_simple_retransmit(sk); 389 } 390 } 391 392 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 393 u8 type, u8 code, int offset, __be32 info) 394 { 395 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 396 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 397 struct net *net = dev_net(skb->dev); 398 struct request_sock *fastopen; 399 struct ipv6_pinfo *np; 400 struct tcp_sock *tp; 401 __u32 seq, snd_una; 402 struct sock *sk; 403 bool fatal; 404 int err; 405 406 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 407 &hdr->daddr, th->dest, 408 &hdr->saddr, ntohs(th->source), 409 skb->dev->ifindex, inet6_sdif(skb)); 410 411 if (!sk) { 412 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 413 ICMP6_MIB_INERRORS); 414 return -ENOENT; 415 } 416 417 if (sk->sk_state == TCP_TIME_WAIT) { 418 inet_twsk_put(inet_twsk(sk)); 419 return 0; 420 } 421 seq = ntohl(th->seq); 422 fatal = icmpv6_err_convert(type, code, &err); 423 if (sk->sk_state == TCP_NEW_SYN_RECV) { 424 tcp_req_err(sk, seq, fatal); 425 return 0; 426 } 427 428 bh_lock_sock(sk); 429 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 430 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 431 432 if (sk->sk_state == TCP_CLOSE) 433 goto out; 434 435 if (static_branch_unlikely(&ip6_min_hopcount)) { 436 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 437 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 438 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 439 goto out; 440 } 441 } 442 443 tp = tcp_sk(sk); 444 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 445 fastopen = rcu_dereference(tp->fastopen_rsk); 446 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 447 if (sk->sk_state != TCP_LISTEN && 448 !between(seq, snd_una, tp->snd_nxt)) { 449 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 450 goto out; 451 } 452 453 np = tcp_inet6_sk(sk); 454 455 if (type == NDISC_REDIRECT) { 456 if (!sock_owned_by_user(sk)) { 457 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 458 459 if (dst) 460 dst->ops->redirect(dst, sk, skb); 461 } 462 goto out; 463 } 464 465 if (type == ICMPV6_PKT_TOOBIG) { 466 u32 mtu = ntohl(info); 467 468 /* We are not interested in TCP_LISTEN and open_requests 469 * (SYN-ACKs send out by Linux are always <576bytes so 470 * they should go through unfragmented). 471 */ 472 if (sk->sk_state == TCP_LISTEN) 473 goto out; 474 475 if (!ip6_sk_accept_pmtu(sk)) 476 goto out; 477 478 if (mtu < IPV6_MIN_MTU) 479 goto out; 480 481 WRITE_ONCE(tp->mtu_info, mtu); 482 483 if (!sock_owned_by_user(sk)) 484 tcp_v6_mtu_reduced(sk); 485 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 486 &sk->sk_tsq_flags)) 487 sock_hold(sk); 488 goto out; 489 } 490 491 492 /* Might be for an request_sock */ 493 switch (sk->sk_state) { 494 case TCP_SYN_SENT: 495 case TCP_SYN_RECV: 496 /* Only in fast or simultaneous open. If a fast open socket is 497 * already accepted it is treated as a connected one below. 498 */ 499 if (fastopen && !fastopen->sk) 500 break; 501 502 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 503 504 if (!sock_owned_by_user(sk)) { 505 sk->sk_err = err; 506 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 507 508 tcp_done(sk); 509 } else 510 sk->sk_err_soft = err; 511 goto out; 512 case TCP_LISTEN: 513 break; 514 default: 515 /* check if this ICMP message allows revert of backoff. 516 * (see RFC 6069) 517 */ 518 if (!fastopen && type == ICMPV6_DEST_UNREACH && 519 code == ICMPV6_NOROUTE) 520 tcp_ld_RTO_revert(sk, seq); 521 } 522 523 if (!sock_owned_by_user(sk) && np->recverr) { 524 sk->sk_err = err; 525 sk_error_report(sk); 526 } else 527 sk->sk_err_soft = err; 528 529 out: 530 bh_unlock_sock(sk); 531 sock_put(sk); 532 return 0; 533 } 534 535 536 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 537 struct flowi *fl, 538 struct request_sock *req, 539 struct tcp_fastopen_cookie *foc, 540 enum tcp_synack_type synack_type, 541 struct sk_buff *syn_skb) 542 { 543 struct inet_request_sock *ireq = inet_rsk(req); 544 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 545 struct ipv6_txoptions *opt; 546 struct flowi6 *fl6 = &fl->u.ip6; 547 struct sk_buff *skb; 548 int err = -ENOMEM; 549 u8 tclass; 550 551 /* First, grab a route. */ 552 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 553 IPPROTO_TCP)) == NULL) 554 goto done; 555 556 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 557 558 if (skb) { 559 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 560 &ireq->ir_v6_rmt_addr); 561 562 fl6->daddr = ireq->ir_v6_rmt_addr; 563 if (np->repflow && ireq->pktopts) 564 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 565 566 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 567 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 568 (np->tclass & INET_ECN_MASK) : 569 np->tclass; 570 571 if (!INET_ECN_is_capable(tclass) && 572 tcp_bpf_ca_needs_ecn((struct sock *)req)) 573 tclass |= INET_ECN_ECT_0; 574 575 rcu_read_lock(); 576 opt = ireq->ipv6_opt; 577 if (!opt) 578 opt = rcu_dereference(np->opt); 579 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 580 tclass, sk->sk_priority); 581 rcu_read_unlock(); 582 err = net_xmit_eval(err); 583 } 584 585 done: 586 return err; 587 } 588 589 590 static void tcp_v6_reqsk_destructor(struct request_sock *req) 591 { 592 kfree(inet_rsk(req)->ipv6_opt); 593 consume_skb(inet_rsk(req)->pktopts); 594 } 595 596 #ifdef CONFIG_TCP_MD5SIG 597 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 598 const struct in6_addr *addr, 599 int l3index) 600 { 601 return tcp_md5_do_lookup(sk, l3index, 602 (union tcp_md5_addr *)addr, AF_INET6); 603 } 604 605 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 606 const struct sock *addr_sk) 607 { 608 int l3index; 609 610 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 611 addr_sk->sk_bound_dev_if); 612 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 613 l3index); 614 } 615 616 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 617 sockptr_t optval, int optlen) 618 { 619 struct tcp_md5sig cmd; 620 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 621 int l3index = 0; 622 u8 prefixlen; 623 u8 flags; 624 625 if (optlen < sizeof(cmd)) 626 return -EINVAL; 627 628 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 629 return -EFAULT; 630 631 if (sin6->sin6_family != AF_INET6) 632 return -EINVAL; 633 634 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 635 636 if (optname == TCP_MD5SIG_EXT && 637 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 638 prefixlen = cmd.tcpm_prefixlen; 639 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 640 prefixlen > 32)) 641 return -EINVAL; 642 } else { 643 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 644 } 645 646 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 647 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 648 struct net_device *dev; 649 650 rcu_read_lock(); 651 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 652 if (dev && netif_is_l3_master(dev)) 653 l3index = dev->ifindex; 654 rcu_read_unlock(); 655 656 /* ok to reference set/not set outside of rcu; 657 * right now device MUST be an L3 master 658 */ 659 if (!dev || !l3index) 660 return -EINVAL; 661 } 662 663 if (!cmd.tcpm_keylen) { 664 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 665 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 666 AF_INET, prefixlen, 667 l3index, flags); 668 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 669 AF_INET6, prefixlen, l3index, flags); 670 } 671 672 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 673 return -EINVAL; 674 675 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 676 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 677 AF_INET, prefixlen, l3index, flags, 678 cmd.tcpm_key, cmd.tcpm_keylen, 679 GFP_KERNEL); 680 681 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 682 AF_INET6, prefixlen, l3index, flags, 683 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 684 } 685 686 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 687 const struct in6_addr *daddr, 688 const struct in6_addr *saddr, 689 const struct tcphdr *th, int nbytes) 690 { 691 struct tcp6_pseudohdr *bp; 692 struct scatterlist sg; 693 struct tcphdr *_th; 694 695 bp = hp->scratch; 696 /* 1. TCP pseudo-header (RFC2460) */ 697 bp->saddr = *saddr; 698 bp->daddr = *daddr; 699 bp->protocol = cpu_to_be32(IPPROTO_TCP); 700 bp->len = cpu_to_be32(nbytes); 701 702 _th = (struct tcphdr *)(bp + 1); 703 memcpy(_th, th, sizeof(*th)); 704 _th->check = 0; 705 706 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 707 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 708 sizeof(*bp) + sizeof(*th)); 709 return crypto_ahash_update(hp->md5_req); 710 } 711 712 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 713 const struct in6_addr *daddr, struct in6_addr *saddr, 714 const struct tcphdr *th) 715 { 716 struct tcp_md5sig_pool *hp; 717 struct ahash_request *req; 718 719 hp = tcp_get_md5sig_pool(); 720 if (!hp) 721 goto clear_hash_noput; 722 req = hp->md5_req; 723 724 if (crypto_ahash_init(req)) 725 goto clear_hash; 726 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 727 goto clear_hash; 728 if (tcp_md5_hash_key(hp, key)) 729 goto clear_hash; 730 ahash_request_set_crypt(req, NULL, md5_hash, 0); 731 if (crypto_ahash_final(req)) 732 goto clear_hash; 733 734 tcp_put_md5sig_pool(); 735 return 0; 736 737 clear_hash: 738 tcp_put_md5sig_pool(); 739 clear_hash_noput: 740 memset(md5_hash, 0, 16); 741 return 1; 742 } 743 744 static int tcp_v6_md5_hash_skb(char *md5_hash, 745 const struct tcp_md5sig_key *key, 746 const struct sock *sk, 747 const struct sk_buff *skb) 748 { 749 const struct in6_addr *saddr, *daddr; 750 struct tcp_md5sig_pool *hp; 751 struct ahash_request *req; 752 const struct tcphdr *th = tcp_hdr(skb); 753 754 if (sk) { /* valid for establish/request sockets */ 755 saddr = &sk->sk_v6_rcv_saddr; 756 daddr = &sk->sk_v6_daddr; 757 } else { 758 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 759 saddr = &ip6h->saddr; 760 daddr = &ip6h->daddr; 761 } 762 763 hp = tcp_get_md5sig_pool(); 764 if (!hp) 765 goto clear_hash_noput; 766 req = hp->md5_req; 767 768 if (crypto_ahash_init(req)) 769 goto clear_hash; 770 771 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 772 goto clear_hash; 773 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 774 goto clear_hash; 775 if (tcp_md5_hash_key(hp, key)) 776 goto clear_hash; 777 ahash_request_set_crypt(req, NULL, md5_hash, 0); 778 if (crypto_ahash_final(req)) 779 goto clear_hash; 780 781 tcp_put_md5sig_pool(); 782 return 0; 783 784 clear_hash: 785 tcp_put_md5sig_pool(); 786 clear_hash_noput: 787 memset(md5_hash, 0, 16); 788 return 1; 789 } 790 791 #endif 792 793 static void tcp_v6_init_req(struct request_sock *req, 794 const struct sock *sk_listener, 795 struct sk_buff *skb) 796 { 797 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 798 struct inet_request_sock *ireq = inet_rsk(req); 799 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 800 801 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 802 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 803 804 /* So that link locals have meaning */ 805 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 806 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 807 ireq->ir_iif = tcp_v6_iif(skb); 808 809 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 810 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 811 np->rxopt.bits.rxinfo || 812 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 813 np->rxopt.bits.rxohlim || np->repflow)) { 814 refcount_inc(&skb->users); 815 ireq->pktopts = skb; 816 } 817 } 818 819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 820 struct sk_buff *skb, 821 struct flowi *fl, 822 struct request_sock *req) 823 { 824 tcp_v6_init_req(req, sk, skb); 825 826 if (security_inet_conn_request(sk, skb, req)) 827 return NULL; 828 829 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 830 } 831 832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 833 .family = AF_INET6, 834 .obj_size = sizeof(struct tcp6_request_sock), 835 .rtx_syn_ack = tcp_rtx_synack, 836 .send_ack = tcp_v6_reqsk_send_ack, 837 .destructor = tcp_v6_reqsk_destructor, 838 .send_reset = tcp_v6_send_reset, 839 .syn_ack_timeout = tcp_syn_ack_timeout, 840 }; 841 842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 843 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 844 sizeof(struct ipv6hdr), 845 #ifdef CONFIG_TCP_MD5SIG 846 .req_md5_lookup = tcp_v6_md5_lookup, 847 .calc_md5_hash = tcp_v6_md5_hash_skb, 848 #endif 849 #ifdef CONFIG_SYN_COOKIES 850 .cookie_init_seq = cookie_v6_init_sequence, 851 #endif 852 .route_req = tcp_v6_route_req, 853 .init_seq = tcp_v6_init_seq, 854 .init_ts_off = tcp_v6_init_ts_off, 855 .send_synack = tcp_v6_send_synack, 856 }; 857 858 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 859 u32 ack, u32 win, u32 tsval, u32 tsecr, 860 int oif, struct tcp_md5sig_key *key, int rst, 861 u8 tclass, __be32 label, u32 priority, u32 txhash) 862 { 863 const struct tcphdr *th = tcp_hdr(skb); 864 struct tcphdr *t1; 865 struct sk_buff *buff; 866 struct flowi6 fl6; 867 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 868 struct sock *ctl_sk = net->ipv6.tcp_sk; 869 unsigned int tot_len = sizeof(struct tcphdr); 870 __be32 mrst = 0, *topt; 871 struct dst_entry *dst; 872 __u32 mark = 0; 873 874 if (tsecr) 875 tot_len += TCPOLEN_TSTAMP_ALIGNED; 876 #ifdef CONFIG_TCP_MD5SIG 877 if (key) 878 tot_len += TCPOLEN_MD5SIG_ALIGNED; 879 #endif 880 881 #ifdef CONFIG_MPTCP 882 if (rst && !key) { 883 mrst = mptcp_reset_option(skb); 884 885 if (mrst) 886 tot_len += sizeof(__be32); 887 } 888 #endif 889 890 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 891 if (!buff) 892 return; 893 894 skb_reserve(buff, MAX_TCP_HEADER); 895 896 t1 = skb_push(buff, tot_len); 897 skb_reset_transport_header(buff); 898 899 /* Swap the send and the receive. */ 900 memset(t1, 0, sizeof(*t1)); 901 t1->dest = th->source; 902 t1->source = th->dest; 903 t1->doff = tot_len / 4; 904 t1->seq = htonl(seq); 905 t1->ack_seq = htonl(ack); 906 t1->ack = !rst || !th->ack; 907 t1->rst = rst; 908 t1->window = htons(win); 909 910 topt = (__be32 *)(t1 + 1); 911 912 if (tsecr) { 913 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 914 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 915 *topt++ = htonl(tsval); 916 *topt++ = htonl(tsecr); 917 } 918 919 if (mrst) 920 *topt++ = mrst; 921 922 #ifdef CONFIG_TCP_MD5SIG 923 if (key) { 924 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 925 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 926 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 927 &ipv6_hdr(skb)->saddr, 928 &ipv6_hdr(skb)->daddr, t1); 929 } 930 #endif 931 932 memset(&fl6, 0, sizeof(fl6)); 933 fl6.daddr = ipv6_hdr(skb)->saddr; 934 fl6.saddr = ipv6_hdr(skb)->daddr; 935 fl6.flowlabel = label; 936 937 buff->ip_summed = CHECKSUM_PARTIAL; 938 939 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 940 941 fl6.flowi6_proto = IPPROTO_TCP; 942 if (rt6_need_strict(&fl6.daddr) && !oif) 943 fl6.flowi6_oif = tcp_v6_iif(skb); 944 else { 945 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 946 oif = skb->skb_iif; 947 948 fl6.flowi6_oif = oif; 949 } 950 951 if (sk) { 952 if (sk->sk_state == TCP_TIME_WAIT) 953 mark = inet_twsk(sk)->tw_mark; 954 else 955 mark = sk->sk_mark; 956 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 957 } 958 if (txhash) { 959 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 960 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 961 } 962 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 963 fl6.fl6_dport = t1->dest; 964 fl6.fl6_sport = t1->source; 965 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 966 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 967 968 /* Pass a socket to ip6_dst_lookup either it is for RST 969 * Underlying function will use this to retrieve the network 970 * namespace 971 */ 972 if (sk && sk->sk_state != TCP_TIME_WAIT) 973 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 974 else 975 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 976 if (!IS_ERR(dst)) { 977 skb_dst_set(buff, dst); 978 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 979 tclass & ~INET_ECN_MASK, priority); 980 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 981 if (rst) 982 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 983 return; 984 } 985 986 kfree_skb(buff); 987 } 988 989 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 990 { 991 const struct tcphdr *th = tcp_hdr(skb); 992 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 993 u32 seq = 0, ack_seq = 0; 994 struct tcp_md5sig_key *key = NULL; 995 #ifdef CONFIG_TCP_MD5SIG 996 const __u8 *hash_location = NULL; 997 unsigned char newhash[16]; 998 int genhash; 999 struct sock *sk1 = NULL; 1000 #endif 1001 __be32 label = 0; 1002 u32 priority = 0; 1003 struct net *net; 1004 int oif = 0; 1005 1006 if (th->rst) 1007 return; 1008 1009 /* If sk not NULL, it means we did a successful lookup and incoming 1010 * route had to be correct. prequeue might have dropped our dst. 1011 */ 1012 if (!sk && !ipv6_unicast_destination(skb)) 1013 return; 1014 1015 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1016 #ifdef CONFIG_TCP_MD5SIG 1017 rcu_read_lock(); 1018 hash_location = tcp_parse_md5sig_option(th); 1019 if (sk && sk_fullsock(sk)) { 1020 int l3index; 1021 1022 /* sdif set, means packet ingressed via a device 1023 * in an L3 domain and inet_iif is set to it. 1024 */ 1025 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1026 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1027 } else if (hash_location) { 1028 int dif = tcp_v6_iif_l3_slave(skb); 1029 int sdif = tcp_v6_sdif(skb); 1030 int l3index; 1031 1032 /* 1033 * active side is lost. Try to find listening socket through 1034 * source port, and then find md5 key through listening socket. 1035 * we are not loose security here: 1036 * Incoming packet is checked with md5 hash with finding key, 1037 * no RST generated if md5 hash doesn't match. 1038 */ 1039 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1040 NULL, 0, &ipv6h->saddr, th->source, 1041 &ipv6h->daddr, ntohs(th->source), 1042 dif, sdif); 1043 if (!sk1) 1044 goto out; 1045 1046 /* sdif set, means packet ingressed via a device 1047 * in an L3 domain and dif is set to it. 1048 */ 1049 l3index = tcp_v6_sdif(skb) ? dif : 0; 1050 1051 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1052 if (!key) 1053 goto out; 1054 1055 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1056 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1057 goto out; 1058 } 1059 #endif 1060 1061 if (th->ack) 1062 seq = ntohl(th->ack_seq); 1063 else 1064 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1065 (th->doff << 2); 1066 1067 if (sk) { 1068 oif = sk->sk_bound_dev_if; 1069 if (sk_fullsock(sk)) { 1070 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1071 1072 trace_tcp_send_reset(sk, skb); 1073 if (np->repflow) 1074 label = ip6_flowlabel(ipv6h); 1075 priority = sk->sk_priority; 1076 } 1077 if (sk->sk_state == TCP_TIME_WAIT) { 1078 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1079 priority = inet_twsk(sk)->tw_priority; 1080 } 1081 } else { 1082 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1083 label = ip6_flowlabel(ipv6h); 1084 } 1085 1086 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1087 ipv6_get_dsfield(ipv6h), label, priority, 0); 1088 1089 #ifdef CONFIG_TCP_MD5SIG 1090 out: 1091 rcu_read_unlock(); 1092 #endif 1093 } 1094 1095 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1096 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1097 struct tcp_md5sig_key *key, u8 tclass, 1098 __be32 label, u32 priority, u32 txhash) 1099 { 1100 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1101 tclass, label, priority, txhash); 1102 } 1103 1104 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1105 { 1106 struct inet_timewait_sock *tw = inet_twsk(sk); 1107 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1108 1109 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1110 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1111 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1112 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1113 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1114 tw->tw_txhash); 1115 1116 inet_twsk_put(tw); 1117 } 1118 1119 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1120 struct request_sock *req) 1121 { 1122 int l3index; 1123 1124 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1125 1126 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1127 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1128 */ 1129 /* RFC 7323 2.3 1130 * The window field (SEG.WND) of every outgoing segment, with the 1131 * exception of <SYN> segments, MUST be right-shifted by 1132 * Rcv.Wind.Shift bits: 1133 */ 1134 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1135 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1136 tcp_rsk(req)->rcv_nxt, 1137 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1138 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1139 req->ts_recent, sk->sk_bound_dev_if, 1140 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1141 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, 1142 tcp_rsk(req)->txhash); 1143 } 1144 1145 1146 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1147 { 1148 #ifdef CONFIG_SYN_COOKIES 1149 const struct tcphdr *th = tcp_hdr(skb); 1150 1151 if (!th->syn) 1152 sk = cookie_v6_check(sk, skb); 1153 #endif 1154 return sk; 1155 } 1156 1157 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1158 struct tcphdr *th, u32 *cookie) 1159 { 1160 u16 mss = 0; 1161 #ifdef CONFIG_SYN_COOKIES 1162 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1163 &tcp_request_sock_ipv6_ops, sk, th); 1164 if (mss) { 1165 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1166 tcp_synq_overflow(sk); 1167 } 1168 #endif 1169 return mss; 1170 } 1171 1172 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1173 { 1174 if (skb->protocol == htons(ETH_P_IP)) 1175 return tcp_v4_conn_request(sk, skb); 1176 1177 if (!ipv6_unicast_destination(skb)) 1178 goto drop; 1179 1180 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1181 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1182 return 0; 1183 } 1184 1185 return tcp_conn_request(&tcp6_request_sock_ops, 1186 &tcp_request_sock_ipv6_ops, sk, skb); 1187 1188 drop: 1189 tcp_listendrop(sk); 1190 return 0; /* don't send reset */ 1191 } 1192 1193 static void tcp_v6_restore_cb(struct sk_buff *skb) 1194 { 1195 /* We need to move header back to the beginning if xfrm6_policy_check() 1196 * and tcp_v6_fill_cb() are going to be called again. 1197 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1198 */ 1199 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1200 sizeof(struct inet6_skb_parm)); 1201 } 1202 1203 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1204 struct request_sock *req, 1205 struct dst_entry *dst, 1206 struct request_sock *req_unhash, 1207 bool *own_req) 1208 { 1209 struct inet_request_sock *ireq; 1210 struct ipv6_pinfo *newnp; 1211 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1212 struct ipv6_txoptions *opt; 1213 struct inet_sock *newinet; 1214 bool found_dup_sk = false; 1215 struct tcp_sock *newtp; 1216 struct sock *newsk; 1217 #ifdef CONFIG_TCP_MD5SIG 1218 struct tcp_md5sig_key *key; 1219 int l3index; 1220 #endif 1221 struct flowi6 fl6; 1222 1223 if (skb->protocol == htons(ETH_P_IP)) { 1224 /* 1225 * v6 mapped 1226 */ 1227 1228 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1229 req_unhash, own_req); 1230 1231 if (!newsk) 1232 return NULL; 1233 1234 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1235 1236 newnp = tcp_inet6_sk(newsk); 1237 newtp = tcp_sk(newsk); 1238 1239 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1240 1241 newnp->saddr = newsk->sk_v6_rcv_saddr; 1242 1243 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1244 if (sk_is_mptcp(newsk)) 1245 mptcpv6_handle_mapped(newsk, true); 1246 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1247 #ifdef CONFIG_TCP_MD5SIG 1248 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1249 #endif 1250 1251 newnp->ipv6_mc_list = NULL; 1252 newnp->ipv6_ac_list = NULL; 1253 newnp->ipv6_fl_list = NULL; 1254 newnp->pktoptions = NULL; 1255 newnp->opt = NULL; 1256 newnp->mcast_oif = inet_iif(skb); 1257 newnp->mcast_hops = ip_hdr(skb)->ttl; 1258 newnp->rcv_flowinfo = 0; 1259 if (np->repflow) 1260 newnp->flow_label = 0; 1261 1262 /* 1263 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1264 * here, tcp_create_openreq_child now does this for us, see the comment in 1265 * that function for the gory details. -acme 1266 */ 1267 1268 /* It is tricky place. Until this moment IPv4 tcp 1269 worked with IPv6 icsk.icsk_af_ops. 1270 Sync it now. 1271 */ 1272 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1273 1274 return newsk; 1275 } 1276 1277 ireq = inet_rsk(req); 1278 1279 if (sk_acceptq_is_full(sk)) 1280 goto out_overflow; 1281 1282 if (!dst) { 1283 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1284 if (!dst) 1285 goto out; 1286 } 1287 1288 newsk = tcp_create_openreq_child(sk, req, skb); 1289 if (!newsk) 1290 goto out_nonewsk; 1291 1292 /* 1293 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1294 * count here, tcp_create_openreq_child now does this for us, see the 1295 * comment in that function for the gory details. -acme 1296 */ 1297 1298 newsk->sk_gso_type = SKB_GSO_TCPV6; 1299 ip6_dst_store(newsk, dst, NULL, NULL); 1300 inet6_sk_rx_dst_set(newsk, skb); 1301 1302 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1303 1304 newtp = tcp_sk(newsk); 1305 newinet = inet_sk(newsk); 1306 newnp = tcp_inet6_sk(newsk); 1307 1308 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1309 1310 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1311 newnp->saddr = ireq->ir_v6_loc_addr; 1312 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1313 newsk->sk_bound_dev_if = ireq->ir_iif; 1314 1315 /* Now IPv6 options... 1316 1317 First: no IPv4 options. 1318 */ 1319 newinet->inet_opt = NULL; 1320 newnp->ipv6_mc_list = NULL; 1321 newnp->ipv6_ac_list = NULL; 1322 newnp->ipv6_fl_list = NULL; 1323 1324 /* Clone RX bits */ 1325 newnp->rxopt.all = np->rxopt.all; 1326 1327 newnp->pktoptions = NULL; 1328 newnp->opt = NULL; 1329 newnp->mcast_oif = tcp_v6_iif(skb); 1330 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1331 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1332 if (np->repflow) 1333 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1334 1335 /* Set ToS of the new socket based upon the value of incoming SYN. 1336 * ECT bits are set later in tcp_init_transfer(). 1337 */ 1338 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1339 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1340 1341 /* Clone native IPv6 options from listening socket (if any) 1342 1343 Yes, keeping reference count would be much more clever, 1344 but we make one more one thing there: reattach optmem 1345 to newsk. 1346 */ 1347 opt = ireq->ipv6_opt; 1348 if (!opt) 1349 opt = rcu_dereference(np->opt); 1350 if (opt) { 1351 opt = ipv6_dup_options(newsk, opt); 1352 RCU_INIT_POINTER(newnp->opt, opt); 1353 } 1354 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1355 if (opt) 1356 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1357 opt->opt_flen; 1358 1359 tcp_ca_openreq_child(newsk, dst); 1360 1361 tcp_sync_mss(newsk, dst_mtu(dst)); 1362 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1363 1364 tcp_initialize_rcv_mss(newsk); 1365 1366 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1367 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1368 1369 #ifdef CONFIG_TCP_MD5SIG 1370 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1371 1372 /* Copy over the MD5 key from the original socket */ 1373 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1374 if (key) { 1375 /* We're using one, so create a matching key 1376 * on the newsk structure. If we fail to get 1377 * memory, then we end up not copying the key 1378 * across. Shucks. 1379 */ 1380 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1381 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1382 sk_gfp_mask(sk, GFP_ATOMIC)); 1383 } 1384 #endif 1385 1386 if (__inet_inherit_port(sk, newsk) < 0) { 1387 inet_csk_prepare_forced_close(newsk); 1388 tcp_done(newsk); 1389 goto out; 1390 } 1391 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1392 &found_dup_sk); 1393 if (*own_req) { 1394 tcp_move_syn(newtp, req); 1395 1396 /* Clone pktoptions received with SYN, if we own the req */ 1397 if (ireq->pktopts) { 1398 newnp->pktoptions = skb_clone(ireq->pktopts, 1399 sk_gfp_mask(sk, GFP_ATOMIC)); 1400 consume_skb(ireq->pktopts); 1401 ireq->pktopts = NULL; 1402 if (newnp->pktoptions) { 1403 tcp_v6_restore_cb(newnp->pktoptions); 1404 skb_set_owner_r(newnp->pktoptions, newsk); 1405 } 1406 } 1407 } else { 1408 if (!req_unhash && found_dup_sk) { 1409 /* This code path should only be executed in the 1410 * syncookie case only 1411 */ 1412 bh_unlock_sock(newsk); 1413 sock_put(newsk); 1414 newsk = NULL; 1415 } 1416 } 1417 1418 return newsk; 1419 1420 out_overflow: 1421 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1422 out_nonewsk: 1423 dst_release(dst); 1424 out: 1425 tcp_listendrop(sk); 1426 return NULL; 1427 } 1428 1429 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1430 u32)); 1431 /* The socket must have it's spinlock held when we get 1432 * here, unless it is a TCP_LISTEN socket. 1433 * 1434 * We have a potential double-lock case here, so even when 1435 * doing backlog processing we use the BH locking scheme. 1436 * This is because we cannot sleep with the original spinlock 1437 * held. 1438 */ 1439 INDIRECT_CALLABLE_SCOPE 1440 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1441 { 1442 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1443 struct sk_buff *opt_skb = NULL; 1444 enum skb_drop_reason reason; 1445 struct tcp_sock *tp; 1446 1447 /* Imagine: socket is IPv6. IPv4 packet arrives, 1448 goes to IPv4 receive handler and backlogged. 1449 From backlog it always goes here. Kerboom... 1450 Fortunately, tcp_rcv_established and rcv_established 1451 handle them correctly, but it is not case with 1452 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1453 */ 1454 1455 if (skb->protocol == htons(ETH_P_IP)) 1456 return tcp_v4_do_rcv(sk, skb); 1457 1458 /* 1459 * socket locking is here for SMP purposes as backlog rcv 1460 * is currently called with bh processing disabled. 1461 */ 1462 1463 /* Do Stevens' IPV6_PKTOPTIONS. 1464 1465 Yes, guys, it is the only place in our code, where we 1466 may make it not affecting IPv4. 1467 The rest of code is protocol independent, 1468 and I do not like idea to uglify IPv4. 1469 1470 Actually, all the idea behind IPV6_PKTOPTIONS 1471 looks not very well thought. For now we latch 1472 options, received in the last packet, enqueued 1473 by tcp. Feel free to propose better solution. 1474 --ANK (980728) 1475 */ 1476 if (np->rxopt.all) 1477 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1478 1479 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1480 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1481 struct dst_entry *dst; 1482 1483 dst = rcu_dereference_protected(sk->sk_rx_dst, 1484 lockdep_sock_is_held(sk)); 1485 1486 sock_rps_save_rxhash(sk, skb); 1487 sk_mark_napi_id(sk, skb); 1488 if (dst) { 1489 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1490 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1491 dst, sk->sk_rx_dst_cookie) == NULL) { 1492 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1493 dst_release(dst); 1494 } 1495 } 1496 1497 tcp_rcv_established(sk, skb); 1498 if (opt_skb) 1499 goto ipv6_pktoptions; 1500 return 0; 1501 } 1502 1503 if (tcp_checksum_complete(skb)) 1504 goto csum_err; 1505 1506 if (sk->sk_state == TCP_LISTEN) { 1507 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1508 1509 if (!nsk) 1510 goto discard; 1511 1512 if (nsk != sk) { 1513 if (tcp_child_process(sk, nsk, skb)) 1514 goto reset; 1515 if (opt_skb) 1516 __kfree_skb(opt_skb); 1517 return 0; 1518 } 1519 } else 1520 sock_rps_save_rxhash(sk, skb); 1521 1522 if (tcp_rcv_state_process(sk, skb)) 1523 goto reset; 1524 if (opt_skb) 1525 goto ipv6_pktoptions; 1526 return 0; 1527 1528 reset: 1529 tcp_v6_send_reset(sk, skb); 1530 discard: 1531 if (opt_skb) 1532 __kfree_skb(opt_skb); 1533 kfree_skb_reason(skb, reason); 1534 return 0; 1535 csum_err: 1536 reason = SKB_DROP_REASON_TCP_CSUM; 1537 trace_tcp_bad_csum(skb); 1538 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1539 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1540 goto discard; 1541 1542 1543 ipv6_pktoptions: 1544 /* Do you ask, what is it? 1545 1546 1. skb was enqueued by tcp. 1547 2. skb is added to tail of read queue, rather than out of order. 1548 3. socket is not in passive state. 1549 4. Finally, it really contains options, which user wants to receive. 1550 */ 1551 tp = tcp_sk(sk); 1552 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1553 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1554 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1555 np->mcast_oif = tcp_v6_iif(opt_skb); 1556 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1557 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1558 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1559 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1560 if (np->repflow) 1561 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1562 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1563 skb_set_owner_r(opt_skb, sk); 1564 tcp_v6_restore_cb(opt_skb); 1565 opt_skb = xchg(&np->pktoptions, opt_skb); 1566 } else { 1567 __kfree_skb(opt_skb); 1568 opt_skb = xchg(&np->pktoptions, NULL); 1569 } 1570 } 1571 1572 consume_skb(opt_skb); 1573 return 0; 1574 } 1575 1576 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1577 const struct tcphdr *th) 1578 { 1579 /* This is tricky: we move IP6CB at its correct location into 1580 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1581 * _decode_session6() uses IP6CB(). 1582 * barrier() makes sure compiler won't play aliasing games. 1583 */ 1584 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1585 sizeof(struct inet6_skb_parm)); 1586 barrier(); 1587 1588 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1589 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1590 skb->len - th->doff*4); 1591 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1592 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1593 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1594 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1595 TCP_SKB_CB(skb)->sacked = 0; 1596 TCP_SKB_CB(skb)->has_rxtstamp = 1597 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1598 } 1599 1600 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1601 { 1602 enum skb_drop_reason drop_reason; 1603 int sdif = inet6_sdif(skb); 1604 int dif = inet6_iif(skb); 1605 const struct tcphdr *th; 1606 const struct ipv6hdr *hdr; 1607 bool refcounted; 1608 struct sock *sk; 1609 int ret; 1610 struct net *net = dev_net(skb->dev); 1611 1612 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1613 if (skb->pkt_type != PACKET_HOST) 1614 goto discard_it; 1615 1616 /* 1617 * Count it even if it's bad. 1618 */ 1619 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1620 1621 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1622 goto discard_it; 1623 1624 th = (const struct tcphdr *)skb->data; 1625 1626 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1627 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1628 goto bad_packet; 1629 } 1630 if (!pskb_may_pull(skb, th->doff*4)) 1631 goto discard_it; 1632 1633 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1634 goto csum_error; 1635 1636 th = (const struct tcphdr *)skb->data; 1637 hdr = ipv6_hdr(skb); 1638 1639 lookup: 1640 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), 1641 th->source, th->dest, inet6_iif(skb), sdif, 1642 &refcounted); 1643 if (!sk) 1644 goto no_tcp_socket; 1645 1646 process: 1647 if (sk->sk_state == TCP_TIME_WAIT) 1648 goto do_time_wait; 1649 1650 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1651 struct request_sock *req = inet_reqsk(sk); 1652 bool req_stolen = false; 1653 struct sock *nsk; 1654 1655 sk = req->rsk_listener; 1656 drop_reason = tcp_inbound_md5_hash(sk, skb, 1657 &hdr->saddr, &hdr->daddr, 1658 AF_INET6, dif, sdif); 1659 if (drop_reason) { 1660 sk_drops_add(sk, skb); 1661 reqsk_put(req); 1662 goto discard_it; 1663 } 1664 if (tcp_checksum_complete(skb)) { 1665 reqsk_put(req); 1666 goto csum_error; 1667 } 1668 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1669 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1670 if (!nsk) { 1671 inet_csk_reqsk_queue_drop_and_put(sk, req); 1672 goto lookup; 1673 } 1674 sk = nsk; 1675 /* reuseport_migrate_sock() has already held one sk_refcnt 1676 * before returning. 1677 */ 1678 } else { 1679 sock_hold(sk); 1680 } 1681 refcounted = true; 1682 nsk = NULL; 1683 if (!tcp_filter(sk, skb)) { 1684 th = (const struct tcphdr *)skb->data; 1685 hdr = ipv6_hdr(skb); 1686 tcp_v6_fill_cb(skb, hdr, th); 1687 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1688 } else { 1689 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1690 } 1691 if (!nsk) { 1692 reqsk_put(req); 1693 if (req_stolen) { 1694 /* Another cpu got exclusive access to req 1695 * and created a full blown socket. 1696 * Try to feed this packet to this socket 1697 * instead of discarding it. 1698 */ 1699 tcp_v6_restore_cb(skb); 1700 sock_put(sk); 1701 goto lookup; 1702 } 1703 goto discard_and_relse; 1704 } 1705 if (nsk == sk) { 1706 reqsk_put(req); 1707 tcp_v6_restore_cb(skb); 1708 } else if (tcp_child_process(sk, nsk, skb)) { 1709 tcp_v6_send_reset(nsk, skb); 1710 goto discard_and_relse; 1711 } else { 1712 sock_put(sk); 1713 return 0; 1714 } 1715 } 1716 1717 if (static_branch_unlikely(&ip6_min_hopcount)) { 1718 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1719 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1720 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1721 goto discard_and_relse; 1722 } 1723 } 1724 1725 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1726 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1727 goto discard_and_relse; 1728 } 1729 1730 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1731 AF_INET6, dif, sdif); 1732 if (drop_reason) 1733 goto discard_and_relse; 1734 1735 if (tcp_filter(sk, skb)) { 1736 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1737 goto discard_and_relse; 1738 } 1739 th = (const struct tcphdr *)skb->data; 1740 hdr = ipv6_hdr(skb); 1741 tcp_v6_fill_cb(skb, hdr, th); 1742 1743 skb->dev = NULL; 1744 1745 if (sk->sk_state == TCP_LISTEN) { 1746 ret = tcp_v6_do_rcv(sk, skb); 1747 goto put_and_return; 1748 } 1749 1750 sk_incoming_cpu_update(sk); 1751 1752 bh_lock_sock_nested(sk); 1753 tcp_segs_in(tcp_sk(sk), skb); 1754 ret = 0; 1755 if (!sock_owned_by_user(sk)) { 1756 ret = tcp_v6_do_rcv(sk, skb); 1757 } else { 1758 if (tcp_add_backlog(sk, skb, &drop_reason)) 1759 goto discard_and_relse; 1760 } 1761 bh_unlock_sock(sk); 1762 put_and_return: 1763 if (refcounted) 1764 sock_put(sk); 1765 return ret ? -1 : 0; 1766 1767 no_tcp_socket: 1768 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1769 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1770 goto discard_it; 1771 1772 tcp_v6_fill_cb(skb, hdr, th); 1773 1774 if (tcp_checksum_complete(skb)) { 1775 csum_error: 1776 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1777 trace_tcp_bad_csum(skb); 1778 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1779 bad_packet: 1780 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1781 } else { 1782 tcp_v6_send_reset(NULL, skb); 1783 } 1784 1785 discard_it: 1786 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1787 kfree_skb_reason(skb, drop_reason); 1788 return 0; 1789 1790 discard_and_relse: 1791 sk_drops_add(sk, skb); 1792 if (refcounted) 1793 sock_put(sk); 1794 goto discard_it; 1795 1796 do_time_wait: 1797 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1798 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1799 inet_twsk_put(inet_twsk(sk)); 1800 goto discard_it; 1801 } 1802 1803 tcp_v6_fill_cb(skb, hdr, th); 1804 1805 if (tcp_checksum_complete(skb)) { 1806 inet_twsk_put(inet_twsk(sk)); 1807 goto csum_error; 1808 } 1809 1810 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1811 case TCP_TW_SYN: 1812 { 1813 struct sock *sk2; 1814 1815 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1816 skb, __tcp_hdrlen(th), 1817 &ipv6_hdr(skb)->saddr, th->source, 1818 &ipv6_hdr(skb)->daddr, 1819 ntohs(th->dest), 1820 tcp_v6_iif_l3_slave(skb), 1821 sdif); 1822 if (sk2) { 1823 struct inet_timewait_sock *tw = inet_twsk(sk); 1824 inet_twsk_deschedule_put(tw); 1825 sk = sk2; 1826 tcp_v6_restore_cb(skb); 1827 refcounted = false; 1828 goto process; 1829 } 1830 } 1831 /* to ACK */ 1832 fallthrough; 1833 case TCP_TW_ACK: 1834 tcp_v6_timewait_ack(sk, skb); 1835 break; 1836 case TCP_TW_RST: 1837 tcp_v6_send_reset(sk, skb); 1838 inet_twsk_deschedule_put(inet_twsk(sk)); 1839 goto discard_it; 1840 case TCP_TW_SUCCESS: 1841 ; 1842 } 1843 goto discard_it; 1844 } 1845 1846 void tcp_v6_early_demux(struct sk_buff *skb) 1847 { 1848 struct net *net = dev_net(skb->dev); 1849 const struct ipv6hdr *hdr; 1850 const struct tcphdr *th; 1851 struct sock *sk; 1852 1853 if (skb->pkt_type != PACKET_HOST) 1854 return; 1855 1856 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1857 return; 1858 1859 hdr = ipv6_hdr(skb); 1860 th = tcp_hdr(skb); 1861 1862 if (th->doff < sizeof(struct tcphdr) / 4) 1863 return; 1864 1865 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1866 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 1867 &hdr->saddr, th->source, 1868 &hdr->daddr, ntohs(th->dest), 1869 inet6_iif(skb), inet6_sdif(skb)); 1870 if (sk) { 1871 skb->sk = sk; 1872 skb->destructor = sock_edemux; 1873 if (sk_fullsock(sk)) { 1874 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1875 1876 if (dst) 1877 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1878 if (dst && 1879 sk->sk_rx_dst_ifindex == skb->skb_iif) 1880 skb_dst_set_noref(skb, dst); 1881 } 1882 } 1883 } 1884 1885 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1886 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1887 .twsk_unique = tcp_twsk_unique, 1888 .twsk_destructor = tcp_twsk_destructor, 1889 }; 1890 1891 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1892 { 1893 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1894 } 1895 1896 const struct inet_connection_sock_af_ops ipv6_specific = { 1897 .queue_xmit = inet6_csk_xmit, 1898 .send_check = tcp_v6_send_check, 1899 .rebuild_header = inet6_sk_rebuild_header, 1900 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1901 .conn_request = tcp_v6_conn_request, 1902 .syn_recv_sock = tcp_v6_syn_recv_sock, 1903 .net_header_len = sizeof(struct ipv6hdr), 1904 .net_frag_header_len = sizeof(struct frag_hdr), 1905 .setsockopt = ipv6_setsockopt, 1906 .getsockopt = ipv6_getsockopt, 1907 .addr2sockaddr = inet6_csk_addr2sockaddr, 1908 .sockaddr_len = sizeof(struct sockaddr_in6), 1909 .mtu_reduced = tcp_v6_mtu_reduced, 1910 }; 1911 1912 #ifdef CONFIG_TCP_MD5SIG 1913 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1914 .md5_lookup = tcp_v6_md5_lookup, 1915 .calc_md5_hash = tcp_v6_md5_hash_skb, 1916 .md5_parse = tcp_v6_parse_md5_keys, 1917 }; 1918 #endif 1919 1920 /* 1921 * TCP over IPv4 via INET6 API 1922 */ 1923 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1924 .queue_xmit = ip_queue_xmit, 1925 .send_check = tcp_v4_send_check, 1926 .rebuild_header = inet_sk_rebuild_header, 1927 .sk_rx_dst_set = inet_sk_rx_dst_set, 1928 .conn_request = tcp_v6_conn_request, 1929 .syn_recv_sock = tcp_v6_syn_recv_sock, 1930 .net_header_len = sizeof(struct iphdr), 1931 .setsockopt = ipv6_setsockopt, 1932 .getsockopt = ipv6_getsockopt, 1933 .addr2sockaddr = inet6_csk_addr2sockaddr, 1934 .sockaddr_len = sizeof(struct sockaddr_in6), 1935 .mtu_reduced = tcp_v4_mtu_reduced, 1936 }; 1937 1938 #ifdef CONFIG_TCP_MD5SIG 1939 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1940 .md5_lookup = tcp_v4_md5_lookup, 1941 .calc_md5_hash = tcp_v4_md5_hash_skb, 1942 .md5_parse = tcp_v6_parse_md5_keys, 1943 }; 1944 #endif 1945 1946 /* NOTE: A lot of things set to zero explicitly by call to 1947 * sk_alloc() so need not be done here. 1948 */ 1949 static int tcp_v6_init_sock(struct sock *sk) 1950 { 1951 struct inet_connection_sock *icsk = inet_csk(sk); 1952 1953 tcp_init_sock(sk); 1954 1955 icsk->icsk_af_ops = &ipv6_specific; 1956 1957 #ifdef CONFIG_TCP_MD5SIG 1958 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1959 #endif 1960 1961 return 0; 1962 } 1963 1964 static void tcp_v6_destroy_sock(struct sock *sk) 1965 { 1966 tcp_v4_destroy_sock(sk); 1967 inet6_destroy_sock(sk); 1968 } 1969 1970 #ifdef CONFIG_PROC_FS 1971 /* Proc filesystem TCPv6 sock list dumping. */ 1972 static void get_openreq6(struct seq_file *seq, 1973 const struct request_sock *req, int i) 1974 { 1975 long ttd = req->rsk_timer.expires - jiffies; 1976 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1977 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1978 1979 if (ttd < 0) 1980 ttd = 0; 1981 1982 seq_printf(seq, 1983 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1984 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1985 i, 1986 src->s6_addr32[0], src->s6_addr32[1], 1987 src->s6_addr32[2], src->s6_addr32[3], 1988 inet_rsk(req)->ir_num, 1989 dest->s6_addr32[0], dest->s6_addr32[1], 1990 dest->s6_addr32[2], dest->s6_addr32[3], 1991 ntohs(inet_rsk(req)->ir_rmt_port), 1992 TCP_SYN_RECV, 1993 0, 0, /* could print option size, but that is af dependent. */ 1994 1, /* timers active (only the expire timer) */ 1995 jiffies_to_clock_t(ttd), 1996 req->num_timeout, 1997 from_kuid_munged(seq_user_ns(seq), 1998 sock_i_uid(req->rsk_listener)), 1999 0, /* non standard timer */ 2000 0, /* open_requests have no inode */ 2001 0, req); 2002 } 2003 2004 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2005 { 2006 const struct in6_addr *dest, *src; 2007 __u16 destp, srcp; 2008 int timer_active; 2009 unsigned long timer_expires; 2010 const struct inet_sock *inet = inet_sk(sp); 2011 const struct tcp_sock *tp = tcp_sk(sp); 2012 const struct inet_connection_sock *icsk = inet_csk(sp); 2013 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2014 int rx_queue; 2015 int state; 2016 2017 dest = &sp->sk_v6_daddr; 2018 src = &sp->sk_v6_rcv_saddr; 2019 destp = ntohs(inet->inet_dport); 2020 srcp = ntohs(inet->inet_sport); 2021 2022 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2023 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2024 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2025 timer_active = 1; 2026 timer_expires = icsk->icsk_timeout; 2027 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2028 timer_active = 4; 2029 timer_expires = icsk->icsk_timeout; 2030 } else if (timer_pending(&sp->sk_timer)) { 2031 timer_active = 2; 2032 timer_expires = sp->sk_timer.expires; 2033 } else { 2034 timer_active = 0; 2035 timer_expires = jiffies; 2036 } 2037 2038 state = inet_sk_state_load(sp); 2039 if (state == TCP_LISTEN) 2040 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2041 else 2042 /* Because we don't lock the socket, 2043 * we might find a transient negative value. 2044 */ 2045 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2046 READ_ONCE(tp->copied_seq), 0); 2047 2048 seq_printf(seq, 2049 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2050 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2051 i, 2052 src->s6_addr32[0], src->s6_addr32[1], 2053 src->s6_addr32[2], src->s6_addr32[3], srcp, 2054 dest->s6_addr32[0], dest->s6_addr32[1], 2055 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2056 state, 2057 READ_ONCE(tp->write_seq) - tp->snd_una, 2058 rx_queue, 2059 timer_active, 2060 jiffies_delta_to_clock_t(timer_expires - jiffies), 2061 icsk->icsk_retransmits, 2062 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2063 icsk->icsk_probes_out, 2064 sock_i_ino(sp), 2065 refcount_read(&sp->sk_refcnt), sp, 2066 jiffies_to_clock_t(icsk->icsk_rto), 2067 jiffies_to_clock_t(icsk->icsk_ack.ato), 2068 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2069 tcp_snd_cwnd(tp), 2070 state == TCP_LISTEN ? 2071 fastopenq->max_qlen : 2072 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2073 ); 2074 } 2075 2076 static void get_timewait6_sock(struct seq_file *seq, 2077 struct inet_timewait_sock *tw, int i) 2078 { 2079 long delta = tw->tw_timer.expires - jiffies; 2080 const struct in6_addr *dest, *src; 2081 __u16 destp, srcp; 2082 2083 dest = &tw->tw_v6_daddr; 2084 src = &tw->tw_v6_rcv_saddr; 2085 destp = ntohs(tw->tw_dport); 2086 srcp = ntohs(tw->tw_sport); 2087 2088 seq_printf(seq, 2089 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2090 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2091 i, 2092 src->s6_addr32[0], src->s6_addr32[1], 2093 src->s6_addr32[2], src->s6_addr32[3], srcp, 2094 dest->s6_addr32[0], dest->s6_addr32[1], 2095 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2096 tw->tw_substate, 0, 0, 2097 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2098 refcount_read(&tw->tw_refcnt), tw); 2099 } 2100 2101 static int tcp6_seq_show(struct seq_file *seq, void *v) 2102 { 2103 struct tcp_iter_state *st; 2104 struct sock *sk = v; 2105 2106 if (v == SEQ_START_TOKEN) { 2107 seq_puts(seq, 2108 " sl " 2109 "local_address " 2110 "remote_address " 2111 "st tx_queue rx_queue tr tm->when retrnsmt" 2112 " uid timeout inode\n"); 2113 goto out; 2114 } 2115 st = seq->private; 2116 2117 if (sk->sk_state == TCP_TIME_WAIT) 2118 get_timewait6_sock(seq, v, st->num); 2119 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2120 get_openreq6(seq, v, st->num); 2121 else 2122 get_tcp6_sock(seq, v, st->num); 2123 out: 2124 return 0; 2125 } 2126 2127 static const struct seq_operations tcp6_seq_ops = { 2128 .show = tcp6_seq_show, 2129 .start = tcp_seq_start, 2130 .next = tcp_seq_next, 2131 .stop = tcp_seq_stop, 2132 }; 2133 2134 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2135 .family = AF_INET6, 2136 }; 2137 2138 int __net_init tcp6_proc_init(struct net *net) 2139 { 2140 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2141 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2142 return -ENOMEM; 2143 return 0; 2144 } 2145 2146 void tcp6_proc_exit(struct net *net) 2147 { 2148 remove_proc_entry("tcp6", net->proc_net); 2149 } 2150 #endif 2151 2152 struct proto tcpv6_prot = { 2153 .name = "TCPv6", 2154 .owner = THIS_MODULE, 2155 .close = tcp_close, 2156 .pre_connect = tcp_v6_pre_connect, 2157 .connect = tcp_v6_connect, 2158 .disconnect = tcp_disconnect, 2159 .accept = inet_csk_accept, 2160 .ioctl = tcp_ioctl, 2161 .init = tcp_v6_init_sock, 2162 .destroy = tcp_v6_destroy_sock, 2163 .shutdown = tcp_shutdown, 2164 .setsockopt = tcp_setsockopt, 2165 .getsockopt = tcp_getsockopt, 2166 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2167 .keepalive = tcp_set_keepalive, 2168 .recvmsg = tcp_recvmsg, 2169 .sendmsg = tcp_sendmsg, 2170 .sendpage = tcp_sendpage, 2171 .backlog_rcv = tcp_v6_do_rcv, 2172 .release_cb = tcp_release_cb, 2173 .hash = inet6_hash, 2174 .unhash = inet_unhash, 2175 .get_port = inet_csk_get_port, 2176 .put_port = inet_put_port, 2177 #ifdef CONFIG_BPF_SYSCALL 2178 .psock_update_sk_prot = tcp_bpf_update_proto, 2179 #endif 2180 .enter_memory_pressure = tcp_enter_memory_pressure, 2181 .leave_memory_pressure = tcp_leave_memory_pressure, 2182 .stream_memory_free = tcp_stream_memory_free, 2183 .sockets_allocated = &tcp_sockets_allocated, 2184 2185 .memory_allocated = &tcp_memory_allocated, 2186 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2187 2188 .memory_pressure = &tcp_memory_pressure, 2189 .orphan_count = &tcp_orphan_count, 2190 .sysctl_mem = sysctl_tcp_mem, 2191 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2192 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2193 .max_header = MAX_TCP_HEADER, 2194 .obj_size = sizeof(struct tcp6_sock), 2195 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2196 .twsk_prot = &tcp6_timewait_sock_ops, 2197 .rsk_prot = &tcp6_request_sock_ops, 2198 .h.hashinfo = NULL, 2199 .no_autobind = true, 2200 .diag_destroy = tcp_abort, 2201 }; 2202 EXPORT_SYMBOL_GPL(tcpv6_prot); 2203 2204 static const struct inet6_protocol tcpv6_protocol = { 2205 .handler = tcp_v6_rcv, 2206 .err_handler = tcp_v6_err, 2207 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2208 }; 2209 2210 static struct inet_protosw tcpv6_protosw = { 2211 .type = SOCK_STREAM, 2212 .protocol = IPPROTO_TCP, 2213 .prot = &tcpv6_prot, 2214 .ops = &inet6_stream_ops, 2215 .flags = INET_PROTOSW_PERMANENT | 2216 INET_PROTOSW_ICSK, 2217 }; 2218 2219 static int __net_init tcpv6_net_init(struct net *net) 2220 { 2221 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2222 SOCK_RAW, IPPROTO_TCP, net); 2223 } 2224 2225 static void __net_exit tcpv6_net_exit(struct net *net) 2226 { 2227 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2228 } 2229 2230 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2231 { 2232 tcp_twsk_purge(net_exit_list, AF_INET6); 2233 } 2234 2235 static struct pernet_operations tcpv6_net_ops = { 2236 .init = tcpv6_net_init, 2237 .exit = tcpv6_net_exit, 2238 .exit_batch = tcpv6_net_exit_batch, 2239 }; 2240 2241 int __init tcpv6_init(void) 2242 { 2243 int ret; 2244 2245 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2246 if (ret) 2247 goto out; 2248 2249 /* register inet6 protocol */ 2250 ret = inet6_register_protosw(&tcpv6_protosw); 2251 if (ret) 2252 goto out_tcpv6_protocol; 2253 2254 ret = register_pernet_subsys(&tcpv6_net_ops); 2255 if (ret) 2256 goto out_tcpv6_protosw; 2257 2258 ret = mptcpv6_init(); 2259 if (ret) 2260 goto out_tcpv6_pernet_subsys; 2261 2262 out: 2263 return ret; 2264 2265 out_tcpv6_pernet_subsys: 2266 unregister_pernet_subsys(&tcpv6_net_ops); 2267 out_tcpv6_protosw: 2268 inet6_unregister_protosw(&tcpv6_protosw); 2269 out_tcpv6_protocol: 2270 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2271 goto out; 2272 } 2273 2274 void tcpv6_exit(void) 2275 { 2276 unregister_pernet_subsys(&tcpv6_net_ops); 2277 inet6_unregister_protosw(&tcpv6_protosw); 2278 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2279 } 2280