1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_connection_sock *icsk = inet_csk(sk); 150 struct in6_addr *saddr = NULL, *final_p, final; 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct inet_sock *inet = inet_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk); 155 struct net *net = sock_net(sk); 156 struct ipv6_txoptions *opt; 157 struct dst_entry *dst; 158 struct flowi6 fl6; 159 int addr_type; 160 int err; 161 162 if (addr_len < SIN6_LEN_RFC2133) 163 return -EINVAL; 164 165 if (usin->sin6_family != AF_INET6) 166 return -EAFNOSUPPORT; 167 168 memset(&fl6, 0, sizeof(fl6)); 169 170 if (np->sndflow) { 171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 172 IP6_ECN_flow_init(fl6.flowlabel); 173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 174 struct ip6_flowlabel *flowlabel; 175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 176 if (IS_ERR(flowlabel)) 177 return -EINVAL; 178 fl6_sock_release(flowlabel); 179 } 180 } 181 182 /* 183 * connect() to INADDR_ANY means loopback (BSD'ism). 184 */ 185 186 if (ipv6_addr_any(&usin->sin6_addr)) { 187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 189 &usin->sin6_addr); 190 else 191 usin->sin6_addr = in6addr_loopback; 192 } 193 194 addr_type = ipv6_addr_type(&usin->sin6_addr); 195 196 if (addr_type & IPV6_ADDR_MULTICAST) 197 return -ENETUNREACH; 198 199 if (addr_type&IPV6_ADDR_LINKLOCAL) { 200 if (addr_len >= sizeof(struct sockaddr_in6) && 201 usin->sin6_scope_id) { 202 /* If interface is set while binding, indices 203 * must coincide. 204 */ 205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 206 return -EINVAL; 207 208 sk->sk_bound_dev_if = usin->sin6_scope_id; 209 } 210 211 /* Connect to link-local address requires an interface */ 212 if (!sk->sk_bound_dev_if) 213 return -EINVAL; 214 } 215 216 if (tp->rx_opt.ts_recent_stamp && 217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 218 tp->rx_opt.ts_recent = 0; 219 tp->rx_opt.ts_recent_stamp = 0; 220 WRITE_ONCE(tp->write_seq, 0); 221 } 222 223 sk->sk_v6_daddr = usin->sin6_addr; 224 np->flow_label = fl6.flowlabel; 225 226 /* 227 * TCP over IPv4 228 */ 229 230 if (addr_type & IPV6_ADDR_MAPPED) { 231 u32 exthdrlen = icsk->icsk_ext_hdr_len; 232 struct sockaddr_in sin; 233 234 if (ipv6_only_sock(sk)) 235 return -ENETUNREACH; 236 237 sin.sin_family = AF_INET; 238 sin.sin_port = usin->sin6_port; 239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 240 241 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 242 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 243 if (sk_is_mptcp(sk)) 244 mptcpv6_handle_mapped(sk, true); 245 sk->sk_backlog_rcv = tcp_v4_do_rcv; 246 #ifdef CONFIG_TCP_MD5SIG 247 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 248 #endif 249 250 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 251 252 if (err) { 253 icsk->icsk_ext_hdr_len = exthdrlen; 254 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 255 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 256 if (sk_is_mptcp(sk)) 257 mptcpv6_handle_mapped(sk, false); 258 sk->sk_backlog_rcv = tcp_v6_do_rcv; 259 #ifdef CONFIG_TCP_MD5SIG 260 tp->af_specific = &tcp_sock_ipv6_specific; 261 #endif 262 goto failure; 263 } 264 np->saddr = sk->sk_v6_rcv_saddr; 265 266 return err; 267 } 268 269 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 270 saddr = &sk->sk_v6_rcv_saddr; 271 272 fl6.flowi6_proto = IPPROTO_TCP; 273 fl6.daddr = sk->sk_v6_daddr; 274 fl6.saddr = saddr ? *saddr : np->saddr; 275 fl6.flowi6_oif = sk->sk_bound_dev_if; 276 fl6.flowi6_mark = sk->sk_mark; 277 fl6.fl6_dport = usin->sin6_port; 278 fl6.fl6_sport = inet->inet_sport; 279 fl6.flowi6_uid = sk->sk_uid; 280 281 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 282 final_p = fl6_update_dst(&fl6, opt, &final); 283 284 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 285 286 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 287 if (IS_ERR(dst)) { 288 err = PTR_ERR(dst); 289 goto failure; 290 } 291 292 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 293 294 if (!saddr) { 295 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; 296 struct in6_addr prev_v6_rcv_saddr; 297 298 if (icsk->icsk_bind2_hash) { 299 prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, 300 sk, net, inet->inet_num); 301 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 302 } 303 saddr = &fl6.saddr; 304 sk->sk_v6_rcv_saddr = *saddr; 305 306 if (prev_addr_hashbucket) { 307 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); 308 if (err) { 309 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; 310 goto failure; 311 } 312 } 313 } 314 315 /* set the source address */ 316 np->saddr = *saddr; 317 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 318 319 sk->sk_gso_type = SKB_GSO_TCPV6; 320 ip6_dst_store(sk, dst, NULL, NULL); 321 322 icsk->icsk_ext_hdr_len = 0; 323 if (opt) 324 icsk->icsk_ext_hdr_len = opt->opt_flen + 325 opt->opt_nflen; 326 327 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 328 329 inet->inet_dport = usin->sin6_port; 330 331 tcp_set_state(sk, TCP_SYN_SENT); 332 err = inet6_hash_connect(tcp_death_row, sk); 333 if (err) 334 goto late_failure; 335 336 sk_set_txhash(sk); 337 338 if (likely(!tp->repair)) { 339 if (!tp->write_seq) 340 WRITE_ONCE(tp->write_seq, 341 secure_tcpv6_seq(np->saddr.s6_addr32, 342 sk->sk_v6_daddr.s6_addr32, 343 inet->inet_sport, 344 inet->inet_dport)); 345 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 346 sk->sk_v6_daddr.s6_addr32); 347 } 348 349 if (tcp_fastopen_defer_connect(sk, &err)) 350 return err; 351 if (err) 352 goto late_failure; 353 354 err = tcp_connect(sk); 355 if (err) 356 goto late_failure; 357 358 return 0; 359 360 late_failure: 361 tcp_set_state(sk, TCP_CLOSE); 362 failure: 363 inet->inet_dport = 0; 364 sk->sk_route_caps = 0; 365 return err; 366 } 367 368 static void tcp_v6_mtu_reduced(struct sock *sk) 369 { 370 struct dst_entry *dst; 371 u32 mtu; 372 373 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 374 return; 375 376 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 377 378 /* Drop requests trying to increase our current mss. 379 * Check done in __ip6_rt_update_pmtu() is too late. 380 */ 381 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 382 return; 383 384 dst = inet6_csk_update_pmtu(sk, mtu); 385 if (!dst) 386 return; 387 388 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 389 tcp_sync_mss(sk, dst_mtu(dst)); 390 tcp_simple_retransmit(sk); 391 } 392 } 393 394 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 395 u8 type, u8 code, int offset, __be32 info) 396 { 397 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 398 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 399 struct net *net = dev_net(skb->dev); 400 struct request_sock *fastopen; 401 struct ipv6_pinfo *np; 402 struct tcp_sock *tp; 403 __u32 seq, snd_una; 404 struct sock *sk; 405 bool fatal; 406 int err; 407 408 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 409 &hdr->daddr, th->dest, 410 &hdr->saddr, ntohs(th->source), 411 skb->dev->ifindex, inet6_sdif(skb)); 412 413 if (!sk) { 414 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 415 ICMP6_MIB_INERRORS); 416 return -ENOENT; 417 } 418 419 if (sk->sk_state == TCP_TIME_WAIT) { 420 inet_twsk_put(inet_twsk(sk)); 421 return 0; 422 } 423 seq = ntohl(th->seq); 424 fatal = icmpv6_err_convert(type, code, &err); 425 if (sk->sk_state == TCP_NEW_SYN_RECV) { 426 tcp_req_err(sk, seq, fatal); 427 return 0; 428 } 429 430 bh_lock_sock(sk); 431 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 432 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 433 434 if (sk->sk_state == TCP_CLOSE) 435 goto out; 436 437 if (static_branch_unlikely(&ip6_min_hopcount)) { 438 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 439 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 440 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 441 goto out; 442 } 443 } 444 445 tp = tcp_sk(sk); 446 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 447 fastopen = rcu_dereference(tp->fastopen_rsk); 448 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 449 if (sk->sk_state != TCP_LISTEN && 450 !between(seq, snd_una, tp->snd_nxt)) { 451 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 452 goto out; 453 } 454 455 np = tcp_inet6_sk(sk); 456 457 if (type == NDISC_REDIRECT) { 458 if (!sock_owned_by_user(sk)) { 459 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 460 461 if (dst) 462 dst->ops->redirect(dst, sk, skb); 463 } 464 goto out; 465 } 466 467 if (type == ICMPV6_PKT_TOOBIG) { 468 u32 mtu = ntohl(info); 469 470 /* We are not interested in TCP_LISTEN and open_requests 471 * (SYN-ACKs send out by Linux are always <576bytes so 472 * they should go through unfragmented). 473 */ 474 if (sk->sk_state == TCP_LISTEN) 475 goto out; 476 477 if (!ip6_sk_accept_pmtu(sk)) 478 goto out; 479 480 if (mtu < IPV6_MIN_MTU) 481 goto out; 482 483 WRITE_ONCE(tp->mtu_info, mtu); 484 485 if (!sock_owned_by_user(sk)) 486 tcp_v6_mtu_reduced(sk); 487 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 488 &sk->sk_tsq_flags)) 489 sock_hold(sk); 490 goto out; 491 } 492 493 494 /* Might be for an request_sock */ 495 switch (sk->sk_state) { 496 case TCP_SYN_SENT: 497 case TCP_SYN_RECV: 498 /* Only in fast or simultaneous open. If a fast open socket is 499 * already accepted it is treated as a connected one below. 500 */ 501 if (fastopen && !fastopen->sk) 502 break; 503 504 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 505 506 if (!sock_owned_by_user(sk)) { 507 sk->sk_err = err; 508 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 509 510 tcp_done(sk); 511 } else 512 sk->sk_err_soft = err; 513 goto out; 514 case TCP_LISTEN: 515 break; 516 default: 517 /* check if this ICMP message allows revert of backoff. 518 * (see RFC 6069) 519 */ 520 if (!fastopen && type == ICMPV6_DEST_UNREACH && 521 code == ICMPV6_NOROUTE) 522 tcp_ld_RTO_revert(sk, seq); 523 } 524 525 if (!sock_owned_by_user(sk) && np->recverr) { 526 sk->sk_err = err; 527 sk_error_report(sk); 528 } else 529 sk->sk_err_soft = err; 530 531 out: 532 bh_unlock_sock(sk); 533 sock_put(sk); 534 return 0; 535 } 536 537 538 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 539 struct flowi *fl, 540 struct request_sock *req, 541 struct tcp_fastopen_cookie *foc, 542 enum tcp_synack_type synack_type, 543 struct sk_buff *syn_skb) 544 { 545 struct inet_request_sock *ireq = inet_rsk(req); 546 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 547 struct ipv6_txoptions *opt; 548 struct flowi6 *fl6 = &fl->u.ip6; 549 struct sk_buff *skb; 550 int err = -ENOMEM; 551 u8 tclass; 552 553 /* First, grab a route. */ 554 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 555 IPPROTO_TCP)) == NULL) 556 goto done; 557 558 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 559 560 if (skb) { 561 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 562 &ireq->ir_v6_rmt_addr); 563 564 fl6->daddr = ireq->ir_v6_rmt_addr; 565 if (np->repflow && ireq->pktopts) 566 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 567 568 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 569 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 570 (np->tclass & INET_ECN_MASK) : 571 np->tclass; 572 573 if (!INET_ECN_is_capable(tclass) && 574 tcp_bpf_ca_needs_ecn((struct sock *)req)) 575 tclass |= INET_ECN_ECT_0; 576 577 rcu_read_lock(); 578 opt = ireq->ipv6_opt; 579 if (!opt) 580 opt = rcu_dereference(np->opt); 581 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 582 tclass, sk->sk_priority); 583 rcu_read_unlock(); 584 err = net_xmit_eval(err); 585 } 586 587 done: 588 return err; 589 } 590 591 592 static void tcp_v6_reqsk_destructor(struct request_sock *req) 593 { 594 kfree(inet_rsk(req)->ipv6_opt); 595 consume_skb(inet_rsk(req)->pktopts); 596 } 597 598 #ifdef CONFIG_TCP_MD5SIG 599 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 600 const struct in6_addr *addr, 601 int l3index) 602 { 603 return tcp_md5_do_lookup(sk, l3index, 604 (union tcp_md5_addr *)addr, AF_INET6); 605 } 606 607 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 608 const struct sock *addr_sk) 609 { 610 int l3index; 611 612 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 613 addr_sk->sk_bound_dev_if); 614 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 615 l3index); 616 } 617 618 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 619 sockptr_t optval, int optlen) 620 { 621 struct tcp_md5sig cmd; 622 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 623 int l3index = 0; 624 u8 prefixlen; 625 u8 flags; 626 627 if (optlen < sizeof(cmd)) 628 return -EINVAL; 629 630 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 631 return -EFAULT; 632 633 if (sin6->sin6_family != AF_INET6) 634 return -EINVAL; 635 636 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 637 638 if (optname == TCP_MD5SIG_EXT && 639 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 640 prefixlen = cmd.tcpm_prefixlen; 641 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 642 prefixlen > 32)) 643 return -EINVAL; 644 } else { 645 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 646 } 647 648 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 649 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 650 struct net_device *dev; 651 652 rcu_read_lock(); 653 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 654 if (dev && netif_is_l3_master(dev)) 655 l3index = dev->ifindex; 656 rcu_read_unlock(); 657 658 /* ok to reference set/not set outside of rcu; 659 * right now device MUST be an L3 master 660 */ 661 if (!dev || !l3index) 662 return -EINVAL; 663 } 664 665 if (!cmd.tcpm_keylen) { 666 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 667 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 668 AF_INET, prefixlen, 669 l3index, flags); 670 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 671 AF_INET6, prefixlen, l3index, flags); 672 } 673 674 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 675 return -EINVAL; 676 677 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 678 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 679 AF_INET, prefixlen, l3index, flags, 680 cmd.tcpm_key, cmd.tcpm_keylen, 681 GFP_KERNEL); 682 683 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 684 AF_INET6, prefixlen, l3index, flags, 685 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 686 } 687 688 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 689 const struct in6_addr *daddr, 690 const struct in6_addr *saddr, 691 const struct tcphdr *th, int nbytes) 692 { 693 struct tcp6_pseudohdr *bp; 694 struct scatterlist sg; 695 struct tcphdr *_th; 696 697 bp = hp->scratch; 698 /* 1. TCP pseudo-header (RFC2460) */ 699 bp->saddr = *saddr; 700 bp->daddr = *daddr; 701 bp->protocol = cpu_to_be32(IPPROTO_TCP); 702 bp->len = cpu_to_be32(nbytes); 703 704 _th = (struct tcphdr *)(bp + 1); 705 memcpy(_th, th, sizeof(*th)); 706 _th->check = 0; 707 708 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 709 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 710 sizeof(*bp) + sizeof(*th)); 711 return crypto_ahash_update(hp->md5_req); 712 } 713 714 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 715 const struct in6_addr *daddr, struct in6_addr *saddr, 716 const struct tcphdr *th) 717 { 718 struct tcp_md5sig_pool *hp; 719 struct ahash_request *req; 720 721 hp = tcp_get_md5sig_pool(); 722 if (!hp) 723 goto clear_hash_noput; 724 req = hp->md5_req; 725 726 if (crypto_ahash_init(req)) 727 goto clear_hash; 728 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 729 goto clear_hash; 730 if (tcp_md5_hash_key(hp, key)) 731 goto clear_hash; 732 ahash_request_set_crypt(req, NULL, md5_hash, 0); 733 if (crypto_ahash_final(req)) 734 goto clear_hash; 735 736 tcp_put_md5sig_pool(); 737 return 0; 738 739 clear_hash: 740 tcp_put_md5sig_pool(); 741 clear_hash_noput: 742 memset(md5_hash, 0, 16); 743 return 1; 744 } 745 746 static int tcp_v6_md5_hash_skb(char *md5_hash, 747 const struct tcp_md5sig_key *key, 748 const struct sock *sk, 749 const struct sk_buff *skb) 750 { 751 const struct in6_addr *saddr, *daddr; 752 struct tcp_md5sig_pool *hp; 753 struct ahash_request *req; 754 const struct tcphdr *th = tcp_hdr(skb); 755 756 if (sk) { /* valid for establish/request sockets */ 757 saddr = &sk->sk_v6_rcv_saddr; 758 daddr = &sk->sk_v6_daddr; 759 } else { 760 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 761 saddr = &ip6h->saddr; 762 daddr = &ip6h->daddr; 763 } 764 765 hp = tcp_get_md5sig_pool(); 766 if (!hp) 767 goto clear_hash_noput; 768 req = hp->md5_req; 769 770 if (crypto_ahash_init(req)) 771 goto clear_hash; 772 773 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 774 goto clear_hash; 775 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 776 goto clear_hash; 777 if (tcp_md5_hash_key(hp, key)) 778 goto clear_hash; 779 ahash_request_set_crypt(req, NULL, md5_hash, 0); 780 if (crypto_ahash_final(req)) 781 goto clear_hash; 782 783 tcp_put_md5sig_pool(); 784 return 0; 785 786 clear_hash: 787 tcp_put_md5sig_pool(); 788 clear_hash_noput: 789 memset(md5_hash, 0, 16); 790 return 1; 791 } 792 793 #endif 794 795 static void tcp_v6_init_req(struct request_sock *req, 796 const struct sock *sk_listener, 797 struct sk_buff *skb) 798 { 799 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 800 struct inet_request_sock *ireq = inet_rsk(req); 801 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 802 803 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 804 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 805 806 /* So that link locals have meaning */ 807 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 808 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 809 ireq->ir_iif = tcp_v6_iif(skb); 810 811 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 812 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 813 np->rxopt.bits.rxinfo || 814 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 815 np->rxopt.bits.rxohlim || np->repflow)) { 816 refcount_inc(&skb->users); 817 ireq->pktopts = skb; 818 } 819 } 820 821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 822 struct sk_buff *skb, 823 struct flowi *fl, 824 struct request_sock *req) 825 { 826 tcp_v6_init_req(req, sk, skb); 827 828 if (security_inet_conn_request(sk, skb, req)) 829 return NULL; 830 831 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 832 } 833 834 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 835 .family = AF_INET6, 836 .obj_size = sizeof(struct tcp6_request_sock), 837 .rtx_syn_ack = tcp_rtx_synack, 838 .send_ack = tcp_v6_reqsk_send_ack, 839 .destructor = tcp_v6_reqsk_destructor, 840 .send_reset = tcp_v6_send_reset, 841 .syn_ack_timeout = tcp_syn_ack_timeout, 842 }; 843 844 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 845 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 846 sizeof(struct ipv6hdr), 847 #ifdef CONFIG_TCP_MD5SIG 848 .req_md5_lookup = tcp_v6_md5_lookup, 849 .calc_md5_hash = tcp_v6_md5_hash_skb, 850 #endif 851 #ifdef CONFIG_SYN_COOKIES 852 .cookie_init_seq = cookie_v6_init_sequence, 853 #endif 854 .route_req = tcp_v6_route_req, 855 .init_seq = tcp_v6_init_seq, 856 .init_ts_off = tcp_v6_init_ts_off, 857 .send_synack = tcp_v6_send_synack, 858 }; 859 860 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 861 u32 ack, u32 win, u32 tsval, u32 tsecr, 862 int oif, struct tcp_md5sig_key *key, int rst, 863 u8 tclass, __be32 label, u32 priority, u32 txhash) 864 { 865 const struct tcphdr *th = tcp_hdr(skb); 866 struct tcphdr *t1; 867 struct sk_buff *buff; 868 struct flowi6 fl6; 869 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 870 struct sock *ctl_sk = net->ipv6.tcp_sk; 871 unsigned int tot_len = sizeof(struct tcphdr); 872 __be32 mrst = 0, *topt; 873 struct dst_entry *dst; 874 __u32 mark = 0; 875 876 if (tsecr) 877 tot_len += TCPOLEN_TSTAMP_ALIGNED; 878 #ifdef CONFIG_TCP_MD5SIG 879 if (key) 880 tot_len += TCPOLEN_MD5SIG_ALIGNED; 881 #endif 882 883 #ifdef CONFIG_MPTCP 884 if (rst && !key) { 885 mrst = mptcp_reset_option(skb); 886 887 if (mrst) 888 tot_len += sizeof(__be32); 889 } 890 #endif 891 892 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 893 if (!buff) 894 return; 895 896 skb_reserve(buff, MAX_TCP_HEADER); 897 898 t1 = skb_push(buff, tot_len); 899 skb_reset_transport_header(buff); 900 901 /* Swap the send and the receive. */ 902 memset(t1, 0, sizeof(*t1)); 903 t1->dest = th->source; 904 t1->source = th->dest; 905 t1->doff = tot_len / 4; 906 t1->seq = htonl(seq); 907 t1->ack_seq = htonl(ack); 908 t1->ack = !rst || !th->ack; 909 t1->rst = rst; 910 t1->window = htons(win); 911 912 topt = (__be32 *)(t1 + 1); 913 914 if (tsecr) { 915 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 916 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 917 *topt++ = htonl(tsval); 918 *topt++ = htonl(tsecr); 919 } 920 921 if (mrst) 922 *topt++ = mrst; 923 924 #ifdef CONFIG_TCP_MD5SIG 925 if (key) { 926 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 927 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 928 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 929 &ipv6_hdr(skb)->saddr, 930 &ipv6_hdr(skb)->daddr, t1); 931 } 932 #endif 933 934 memset(&fl6, 0, sizeof(fl6)); 935 fl6.daddr = ipv6_hdr(skb)->saddr; 936 fl6.saddr = ipv6_hdr(skb)->daddr; 937 fl6.flowlabel = label; 938 939 buff->ip_summed = CHECKSUM_PARTIAL; 940 941 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 942 943 fl6.flowi6_proto = IPPROTO_TCP; 944 if (rt6_need_strict(&fl6.daddr) && !oif) 945 fl6.flowi6_oif = tcp_v6_iif(skb); 946 else { 947 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 948 oif = skb->skb_iif; 949 950 fl6.flowi6_oif = oif; 951 } 952 953 if (sk) { 954 if (sk->sk_state == TCP_TIME_WAIT) 955 mark = inet_twsk(sk)->tw_mark; 956 else 957 mark = sk->sk_mark; 958 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 959 } 960 if (txhash) { 961 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 962 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 963 } 964 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 965 fl6.fl6_dport = t1->dest; 966 fl6.fl6_sport = t1->source; 967 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 968 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 969 970 /* Pass a socket to ip6_dst_lookup either it is for RST 971 * Underlying function will use this to retrieve the network 972 * namespace 973 */ 974 if (sk && sk->sk_state != TCP_TIME_WAIT) 975 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 976 else 977 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 978 if (!IS_ERR(dst)) { 979 skb_dst_set(buff, dst); 980 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 981 tclass & ~INET_ECN_MASK, priority); 982 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 983 if (rst) 984 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 985 return; 986 } 987 988 kfree_skb(buff); 989 } 990 991 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 992 { 993 const struct tcphdr *th = tcp_hdr(skb); 994 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 995 u32 seq = 0, ack_seq = 0; 996 struct tcp_md5sig_key *key = NULL; 997 #ifdef CONFIG_TCP_MD5SIG 998 const __u8 *hash_location = NULL; 999 unsigned char newhash[16]; 1000 int genhash; 1001 struct sock *sk1 = NULL; 1002 #endif 1003 __be32 label = 0; 1004 u32 priority = 0; 1005 struct net *net; 1006 u32 txhash = 0; 1007 int oif = 0; 1008 1009 if (th->rst) 1010 return; 1011 1012 /* If sk not NULL, it means we did a successful lookup and incoming 1013 * route had to be correct. prequeue might have dropped our dst. 1014 */ 1015 if (!sk && !ipv6_unicast_destination(skb)) 1016 return; 1017 1018 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1019 #ifdef CONFIG_TCP_MD5SIG 1020 rcu_read_lock(); 1021 hash_location = tcp_parse_md5sig_option(th); 1022 if (sk && sk_fullsock(sk)) { 1023 int l3index; 1024 1025 /* sdif set, means packet ingressed via a device 1026 * in an L3 domain and inet_iif is set to it. 1027 */ 1028 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1029 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1030 } else if (hash_location) { 1031 int dif = tcp_v6_iif_l3_slave(skb); 1032 int sdif = tcp_v6_sdif(skb); 1033 int l3index; 1034 1035 /* 1036 * active side is lost. Try to find listening socket through 1037 * source port, and then find md5 key through listening socket. 1038 * we are not loose security here: 1039 * Incoming packet is checked with md5 hash with finding key, 1040 * no RST generated if md5 hash doesn't match. 1041 */ 1042 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1043 NULL, 0, &ipv6h->saddr, th->source, 1044 &ipv6h->daddr, ntohs(th->source), 1045 dif, sdif); 1046 if (!sk1) 1047 goto out; 1048 1049 /* sdif set, means packet ingressed via a device 1050 * in an L3 domain and dif is set to it. 1051 */ 1052 l3index = tcp_v6_sdif(skb) ? dif : 0; 1053 1054 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1055 if (!key) 1056 goto out; 1057 1058 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1059 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1060 goto out; 1061 } 1062 #endif 1063 1064 if (th->ack) 1065 seq = ntohl(th->ack_seq); 1066 else 1067 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1068 (th->doff << 2); 1069 1070 if (sk) { 1071 oif = sk->sk_bound_dev_if; 1072 if (sk_fullsock(sk)) { 1073 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1074 1075 trace_tcp_send_reset(sk, skb); 1076 if (np->repflow) 1077 label = ip6_flowlabel(ipv6h); 1078 priority = sk->sk_priority; 1079 txhash = sk->sk_hash; 1080 } 1081 if (sk->sk_state == TCP_TIME_WAIT) { 1082 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1083 priority = inet_twsk(sk)->tw_priority; 1084 txhash = inet_twsk(sk)->tw_txhash; 1085 } 1086 } else { 1087 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1088 label = ip6_flowlabel(ipv6h); 1089 } 1090 1091 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1092 ipv6_get_dsfield(ipv6h), label, priority, txhash); 1093 1094 #ifdef CONFIG_TCP_MD5SIG 1095 out: 1096 rcu_read_unlock(); 1097 #endif 1098 } 1099 1100 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1101 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1102 struct tcp_md5sig_key *key, u8 tclass, 1103 __be32 label, u32 priority, u32 txhash) 1104 { 1105 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1106 tclass, label, priority, txhash); 1107 } 1108 1109 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1110 { 1111 struct inet_timewait_sock *tw = inet_twsk(sk); 1112 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1113 1114 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1115 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1116 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1117 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1118 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1119 tw->tw_txhash); 1120 1121 inet_twsk_put(tw); 1122 } 1123 1124 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1125 struct request_sock *req) 1126 { 1127 int l3index; 1128 1129 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1130 1131 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1132 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1133 */ 1134 /* RFC 7323 2.3 1135 * The window field (SEG.WND) of every outgoing segment, with the 1136 * exception of <SYN> segments, MUST be right-shifted by 1137 * Rcv.Wind.Shift bits: 1138 */ 1139 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1140 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1141 tcp_rsk(req)->rcv_nxt, 1142 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1143 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1144 req->ts_recent, sk->sk_bound_dev_if, 1145 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1146 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, 1147 tcp_rsk(req)->txhash); 1148 } 1149 1150 1151 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1152 { 1153 #ifdef CONFIG_SYN_COOKIES 1154 const struct tcphdr *th = tcp_hdr(skb); 1155 1156 if (!th->syn) 1157 sk = cookie_v6_check(sk, skb); 1158 #endif 1159 return sk; 1160 } 1161 1162 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1163 struct tcphdr *th, u32 *cookie) 1164 { 1165 u16 mss = 0; 1166 #ifdef CONFIG_SYN_COOKIES 1167 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1168 &tcp_request_sock_ipv6_ops, sk, th); 1169 if (mss) { 1170 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1171 tcp_synq_overflow(sk); 1172 } 1173 #endif 1174 return mss; 1175 } 1176 1177 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1178 { 1179 if (skb->protocol == htons(ETH_P_IP)) 1180 return tcp_v4_conn_request(sk, skb); 1181 1182 if (!ipv6_unicast_destination(skb)) 1183 goto drop; 1184 1185 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1186 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1187 return 0; 1188 } 1189 1190 return tcp_conn_request(&tcp6_request_sock_ops, 1191 &tcp_request_sock_ipv6_ops, sk, skb); 1192 1193 drop: 1194 tcp_listendrop(sk); 1195 return 0; /* don't send reset */ 1196 } 1197 1198 static void tcp_v6_restore_cb(struct sk_buff *skb) 1199 { 1200 /* We need to move header back to the beginning if xfrm6_policy_check() 1201 * and tcp_v6_fill_cb() are going to be called again. 1202 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1203 */ 1204 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1205 sizeof(struct inet6_skb_parm)); 1206 } 1207 1208 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1209 struct request_sock *req, 1210 struct dst_entry *dst, 1211 struct request_sock *req_unhash, 1212 bool *own_req) 1213 { 1214 struct inet_request_sock *ireq; 1215 struct ipv6_pinfo *newnp; 1216 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1217 struct ipv6_txoptions *opt; 1218 struct inet_sock *newinet; 1219 bool found_dup_sk = false; 1220 struct tcp_sock *newtp; 1221 struct sock *newsk; 1222 #ifdef CONFIG_TCP_MD5SIG 1223 struct tcp_md5sig_key *key; 1224 int l3index; 1225 #endif 1226 struct flowi6 fl6; 1227 1228 if (skb->protocol == htons(ETH_P_IP)) { 1229 /* 1230 * v6 mapped 1231 */ 1232 1233 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1234 req_unhash, own_req); 1235 1236 if (!newsk) 1237 return NULL; 1238 1239 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1240 1241 newnp = tcp_inet6_sk(newsk); 1242 newtp = tcp_sk(newsk); 1243 1244 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1245 1246 newnp->saddr = newsk->sk_v6_rcv_saddr; 1247 1248 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1249 if (sk_is_mptcp(newsk)) 1250 mptcpv6_handle_mapped(newsk, true); 1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1252 #ifdef CONFIG_TCP_MD5SIG 1253 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1254 #endif 1255 1256 newnp->ipv6_mc_list = NULL; 1257 newnp->ipv6_ac_list = NULL; 1258 newnp->ipv6_fl_list = NULL; 1259 newnp->pktoptions = NULL; 1260 newnp->opt = NULL; 1261 newnp->mcast_oif = inet_iif(skb); 1262 newnp->mcast_hops = ip_hdr(skb)->ttl; 1263 newnp->rcv_flowinfo = 0; 1264 if (np->repflow) 1265 newnp->flow_label = 0; 1266 1267 /* 1268 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1269 * here, tcp_create_openreq_child now does this for us, see the comment in 1270 * that function for the gory details. -acme 1271 */ 1272 1273 /* It is tricky place. Until this moment IPv4 tcp 1274 worked with IPv6 icsk.icsk_af_ops. 1275 Sync it now. 1276 */ 1277 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1278 1279 return newsk; 1280 } 1281 1282 ireq = inet_rsk(req); 1283 1284 if (sk_acceptq_is_full(sk)) 1285 goto out_overflow; 1286 1287 if (!dst) { 1288 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1289 if (!dst) 1290 goto out; 1291 } 1292 1293 newsk = tcp_create_openreq_child(sk, req, skb); 1294 if (!newsk) 1295 goto out_nonewsk; 1296 1297 /* 1298 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1299 * count here, tcp_create_openreq_child now does this for us, see the 1300 * comment in that function for the gory details. -acme 1301 */ 1302 1303 newsk->sk_gso_type = SKB_GSO_TCPV6; 1304 ip6_dst_store(newsk, dst, NULL, NULL); 1305 inet6_sk_rx_dst_set(newsk, skb); 1306 1307 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1308 1309 newtp = tcp_sk(newsk); 1310 newinet = inet_sk(newsk); 1311 newnp = tcp_inet6_sk(newsk); 1312 1313 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1314 1315 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1316 newnp->saddr = ireq->ir_v6_loc_addr; 1317 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1318 newsk->sk_bound_dev_if = ireq->ir_iif; 1319 1320 /* Now IPv6 options... 1321 1322 First: no IPv4 options. 1323 */ 1324 newinet->inet_opt = NULL; 1325 newnp->ipv6_mc_list = NULL; 1326 newnp->ipv6_ac_list = NULL; 1327 newnp->ipv6_fl_list = NULL; 1328 1329 /* Clone RX bits */ 1330 newnp->rxopt.all = np->rxopt.all; 1331 1332 newnp->pktoptions = NULL; 1333 newnp->opt = NULL; 1334 newnp->mcast_oif = tcp_v6_iif(skb); 1335 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1336 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1337 if (np->repflow) 1338 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1339 1340 /* Set ToS of the new socket based upon the value of incoming SYN. 1341 * ECT bits are set later in tcp_init_transfer(). 1342 */ 1343 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1344 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1345 1346 /* Clone native IPv6 options from listening socket (if any) 1347 1348 Yes, keeping reference count would be much more clever, 1349 but we make one more one thing there: reattach optmem 1350 to newsk. 1351 */ 1352 opt = ireq->ipv6_opt; 1353 if (!opt) 1354 opt = rcu_dereference(np->opt); 1355 if (opt) { 1356 opt = ipv6_dup_options(newsk, opt); 1357 RCU_INIT_POINTER(newnp->opt, opt); 1358 } 1359 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1360 if (opt) 1361 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1362 opt->opt_flen; 1363 1364 tcp_ca_openreq_child(newsk, dst); 1365 1366 tcp_sync_mss(newsk, dst_mtu(dst)); 1367 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1368 1369 tcp_initialize_rcv_mss(newsk); 1370 1371 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1372 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1373 1374 #ifdef CONFIG_TCP_MD5SIG 1375 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1376 1377 /* Copy over the MD5 key from the original socket */ 1378 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1379 if (key) { 1380 /* We're using one, so create a matching key 1381 * on the newsk structure. If we fail to get 1382 * memory, then we end up not copying the key 1383 * across. Shucks. 1384 */ 1385 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1386 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1387 sk_gfp_mask(sk, GFP_ATOMIC)); 1388 } 1389 #endif 1390 1391 if (__inet_inherit_port(sk, newsk) < 0) { 1392 inet_csk_prepare_forced_close(newsk); 1393 tcp_done(newsk); 1394 goto out; 1395 } 1396 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1397 &found_dup_sk); 1398 if (*own_req) { 1399 tcp_move_syn(newtp, req); 1400 1401 /* Clone pktoptions received with SYN, if we own the req */ 1402 if (ireq->pktopts) { 1403 newnp->pktoptions = skb_clone(ireq->pktopts, 1404 sk_gfp_mask(sk, GFP_ATOMIC)); 1405 consume_skb(ireq->pktopts); 1406 ireq->pktopts = NULL; 1407 if (newnp->pktoptions) { 1408 tcp_v6_restore_cb(newnp->pktoptions); 1409 skb_set_owner_r(newnp->pktoptions, newsk); 1410 } 1411 } 1412 } else { 1413 if (!req_unhash && found_dup_sk) { 1414 /* This code path should only be executed in the 1415 * syncookie case only 1416 */ 1417 bh_unlock_sock(newsk); 1418 sock_put(newsk); 1419 newsk = NULL; 1420 } 1421 } 1422 1423 return newsk; 1424 1425 out_overflow: 1426 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1427 out_nonewsk: 1428 dst_release(dst); 1429 out: 1430 tcp_listendrop(sk); 1431 return NULL; 1432 } 1433 1434 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1435 u32)); 1436 /* The socket must have it's spinlock held when we get 1437 * here, unless it is a TCP_LISTEN socket. 1438 * 1439 * We have a potential double-lock case here, so even when 1440 * doing backlog processing we use the BH locking scheme. 1441 * This is because we cannot sleep with the original spinlock 1442 * held. 1443 */ 1444 INDIRECT_CALLABLE_SCOPE 1445 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1446 { 1447 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1448 struct sk_buff *opt_skb = NULL; 1449 enum skb_drop_reason reason; 1450 struct tcp_sock *tp; 1451 1452 /* Imagine: socket is IPv6. IPv4 packet arrives, 1453 goes to IPv4 receive handler and backlogged. 1454 From backlog it always goes here. Kerboom... 1455 Fortunately, tcp_rcv_established and rcv_established 1456 handle them correctly, but it is not case with 1457 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1458 */ 1459 1460 if (skb->protocol == htons(ETH_P_IP)) 1461 return tcp_v4_do_rcv(sk, skb); 1462 1463 /* 1464 * socket locking is here for SMP purposes as backlog rcv 1465 * is currently called with bh processing disabled. 1466 */ 1467 1468 /* Do Stevens' IPV6_PKTOPTIONS. 1469 1470 Yes, guys, it is the only place in our code, where we 1471 may make it not affecting IPv4. 1472 The rest of code is protocol independent, 1473 and I do not like idea to uglify IPv4. 1474 1475 Actually, all the idea behind IPV6_PKTOPTIONS 1476 looks not very well thought. For now we latch 1477 options, received in the last packet, enqueued 1478 by tcp. Feel free to propose better solution. 1479 --ANK (980728) 1480 */ 1481 if (np->rxopt.all) 1482 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1483 1484 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1485 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1486 struct dst_entry *dst; 1487 1488 dst = rcu_dereference_protected(sk->sk_rx_dst, 1489 lockdep_sock_is_held(sk)); 1490 1491 sock_rps_save_rxhash(sk, skb); 1492 sk_mark_napi_id(sk, skb); 1493 if (dst) { 1494 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1495 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1496 dst, sk->sk_rx_dst_cookie) == NULL) { 1497 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1498 dst_release(dst); 1499 } 1500 } 1501 1502 tcp_rcv_established(sk, skb); 1503 if (opt_skb) 1504 goto ipv6_pktoptions; 1505 return 0; 1506 } 1507 1508 if (tcp_checksum_complete(skb)) 1509 goto csum_err; 1510 1511 if (sk->sk_state == TCP_LISTEN) { 1512 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1513 1514 if (!nsk) 1515 goto discard; 1516 1517 if (nsk != sk) { 1518 if (tcp_child_process(sk, nsk, skb)) 1519 goto reset; 1520 if (opt_skb) 1521 __kfree_skb(opt_skb); 1522 return 0; 1523 } 1524 } else 1525 sock_rps_save_rxhash(sk, skb); 1526 1527 if (tcp_rcv_state_process(sk, skb)) 1528 goto reset; 1529 if (opt_skb) 1530 goto ipv6_pktoptions; 1531 return 0; 1532 1533 reset: 1534 tcp_v6_send_reset(sk, skb); 1535 discard: 1536 if (opt_skb) 1537 __kfree_skb(opt_skb); 1538 kfree_skb_reason(skb, reason); 1539 return 0; 1540 csum_err: 1541 reason = SKB_DROP_REASON_TCP_CSUM; 1542 trace_tcp_bad_csum(skb); 1543 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1544 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1545 goto discard; 1546 1547 1548 ipv6_pktoptions: 1549 /* Do you ask, what is it? 1550 1551 1. skb was enqueued by tcp. 1552 2. skb is added to tail of read queue, rather than out of order. 1553 3. socket is not in passive state. 1554 4. Finally, it really contains options, which user wants to receive. 1555 */ 1556 tp = tcp_sk(sk); 1557 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1558 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1559 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1560 np->mcast_oif = tcp_v6_iif(opt_skb); 1561 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1562 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1563 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1564 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1565 if (np->repflow) 1566 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1567 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1568 skb_set_owner_r(opt_skb, sk); 1569 tcp_v6_restore_cb(opt_skb); 1570 opt_skb = xchg(&np->pktoptions, opt_skb); 1571 } else { 1572 __kfree_skb(opt_skb); 1573 opt_skb = xchg(&np->pktoptions, NULL); 1574 } 1575 } 1576 1577 consume_skb(opt_skb); 1578 return 0; 1579 } 1580 1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1582 const struct tcphdr *th) 1583 { 1584 /* This is tricky: we move IP6CB at its correct location into 1585 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1586 * _decode_session6() uses IP6CB(). 1587 * barrier() makes sure compiler won't play aliasing games. 1588 */ 1589 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1590 sizeof(struct inet6_skb_parm)); 1591 barrier(); 1592 1593 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1594 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1595 skb->len - th->doff*4); 1596 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1597 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1598 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1599 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1600 TCP_SKB_CB(skb)->sacked = 0; 1601 TCP_SKB_CB(skb)->has_rxtstamp = 1602 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1603 } 1604 1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1606 { 1607 enum skb_drop_reason drop_reason; 1608 int sdif = inet6_sdif(skb); 1609 int dif = inet6_iif(skb); 1610 const struct tcphdr *th; 1611 const struct ipv6hdr *hdr; 1612 bool refcounted; 1613 struct sock *sk; 1614 int ret; 1615 struct net *net = dev_net(skb->dev); 1616 1617 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1618 if (skb->pkt_type != PACKET_HOST) 1619 goto discard_it; 1620 1621 /* 1622 * Count it even if it's bad. 1623 */ 1624 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1625 1626 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1627 goto discard_it; 1628 1629 th = (const struct tcphdr *)skb->data; 1630 1631 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1632 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1633 goto bad_packet; 1634 } 1635 if (!pskb_may_pull(skb, th->doff*4)) 1636 goto discard_it; 1637 1638 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1639 goto csum_error; 1640 1641 th = (const struct tcphdr *)skb->data; 1642 hdr = ipv6_hdr(skb); 1643 1644 lookup: 1645 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), 1646 th->source, th->dest, inet6_iif(skb), sdif, 1647 &refcounted); 1648 if (!sk) 1649 goto no_tcp_socket; 1650 1651 process: 1652 if (sk->sk_state == TCP_TIME_WAIT) 1653 goto do_time_wait; 1654 1655 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1656 struct request_sock *req = inet_reqsk(sk); 1657 bool req_stolen = false; 1658 struct sock *nsk; 1659 1660 sk = req->rsk_listener; 1661 drop_reason = tcp_inbound_md5_hash(sk, skb, 1662 &hdr->saddr, &hdr->daddr, 1663 AF_INET6, dif, sdif); 1664 if (drop_reason) { 1665 sk_drops_add(sk, skb); 1666 reqsk_put(req); 1667 goto discard_it; 1668 } 1669 if (tcp_checksum_complete(skb)) { 1670 reqsk_put(req); 1671 goto csum_error; 1672 } 1673 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1674 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1675 if (!nsk) { 1676 inet_csk_reqsk_queue_drop_and_put(sk, req); 1677 goto lookup; 1678 } 1679 sk = nsk; 1680 /* reuseport_migrate_sock() has already held one sk_refcnt 1681 * before returning. 1682 */ 1683 } else { 1684 sock_hold(sk); 1685 } 1686 refcounted = true; 1687 nsk = NULL; 1688 if (!tcp_filter(sk, skb)) { 1689 th = (const struct tcphdr *)skb->data; 1690 hdr = ipv6_hdr(skb); 1691 tcp_v6_fill_cb(skb, hdr, th); 1692 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1693 } else { 1694 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1695 } 1696 if (!nsk) { 1697 reqsk_put(req); 1698 if (req_stolen) { 1699 /* Another cpu got exclusive access to req 1700 * and created a full blown socket. 1701 * Try to feed this packet to this socket 1702 * instead of discarding it. 1703 */ 1704 tcp_v6_restore_cb(skb); 1705 sock_put(sk); 1706 goto lookup; 1707 } 1708 goto discard_and_relse; 1709 } 1710 if (nsk == sk) { 1711 reqsk_put(req); 1712 tcp_v6_restore_cb(skb); 1713 } else if (tcp_child_process(sk, nsk, skb)) { 1714 tcp_v6_send_reset(nsk, skb); 1715 goto discard_and_relse; 1716 } else { 1717 sock_put(sk); 1718 return 0; 1719 } 1720 } 1721 1722 if (static_branch_unlikely(&ip6_min_hopcount)) { 1723 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1724 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1725 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1726 goto discard_and_relse; 1727 } 1728 } 1729 1730 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1731 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1732 goto discard_and_relse; 1733 } 1734 1735 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1736 AF_INET6, dif, sdif); 1737 if (drop_reason) 1738 goto discard_and_relse; 1739 1740 if (tcp_filter(sk, skb)) { 1741 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1742 goto discard_and_relse; 1743 } 1744 th = (const struct tcphdr *)skb->data; 1745 hdr = ipv6_hdr(skb); 1746 tcp_v6_fill_cb(skb, hdr, th); 1747 1748 skb->dev = NULL; 1749 1750 if (sk->sk_state == TCP_LISTEN) { 1751 ret = tcp_v6_do_rcv(sk, skb); 1752 goto put_and_return; 1753 } 1754 1755 sk_incoming_cpu_update(sk); 1756 1757 bh_lock_sock_nested(sk); 1758 tcp_segs_in(tcp_sk(sk), skb); 1759 ret = 0; 1760 if (!sock_owned_by_user(sk)) { 1761 ret = tcp_v6_do_rcv(sk, skb); 1762 } else { 1763 if (tcp_add_backlog(sk, skb, &drop_reason)) 1764 goto discard_and_relse; 1765 } 1766 bh_unlock_sock(sk); 1767 put_and_return: 1768 if (refcounted) 1769 sock_put(sk); 1770 return ret ? -1 : 0; 1771 1772 no_tcp_socket: 1773 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1774 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1775 goto discard_it; 1776 1777 tcp_v6_fill_cb(skb, hdr, th); 1778 1779 if (tcp_checksum_complete(skb)) { 1780 csum_error: 1781 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1782 trace_tcp_bad_csum(skb); 1783 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1784 bad_packet: 1785 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1786 } else { 1787 tcp_v6_send_reset(NULL, skb); 1788 } 1789 1790 discard_it: 1791 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1792 kfree_skb_reason(skb, drop_reason); 1793 return 0; 1794 1795 discard_and_relse: 1796 sk_drops_add(sk, skb); 1797 if (refcounted) 1798 sock_put(sk); 1799 goto discard_it; 1800 1801 do_time_wait: 1802 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1803 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1804 inet_twsk_put(inet_twsk(sk)); 1805 goto discard_it; 1806 } 1807 1808 tcp_v6_fill_cb(skb, hdr, th); 1809 1810 if (tcp_checksum_complete(skb)) { 1811 inet_twsk_put(inet_twsk(sk)); 1812 goto csum_error; 1813 } 1814 1815 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1816 case TCP_TW_SYN: 1817 { 1818 struct sock *sk2; 1819 1820 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1821 skb, __tcp_hdrlen(th), 1822 &ipv6_hdr(skb)->saddr, th->source, 1823 &ipv6_hdr(skb)->daddr, 1824 ntohs(th->dest), 1825 tcp_v6_iif_l3_slave(skb), 1826 sdif); 1827 if (sk2) { 1828 struct inet_timewait_sock *tw = inet_twsk(sk); 1829 inet_twsk_deschedule_put(tw); 1830 sk = sk2; 1831 tcp_v6_restore_cb(skb); 1832 refcounted = false; 1833 goto process; 1834 } 1835 } 1836 /* to ACK */ 1837 fallthrough; 1838 case TCP_TW_ACK: 1839 tcp_v6_timewait_ack(sk, skb); 1840 break; 1841 case TCP_TW_RST: 1842 tcp_v6_send_reset(sk, skb); 1843 inet_twsk_deschedule_put(inet_twsk(sk)); 1844 goto discard_it; 1845 case TCP_TW_SUCCESS: 1846 ; 1847 } 1848 goto discard_it; 1849 } 1850 1851 void tcp_v6_early_demux(struct sk_buff *skb) 1852 { 1853 struct net *net = dev_net(skb->dev); 1854 const struct ipv6hdr *hdr; 1855 const struct tcphdr *th; 1856 struct sock *sk; 1857 1858 if (skb->pkt_type != PACKET_HOST) 1859 return; 1860 1861 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1862 return; 1863 1864 hdr = ipv6_hdr(skb); 1865 th = tcp_hdr(skb); 1866 1867 if (th->doff < sizeof(struct tcphdr) / 4) 1868 return; 1869 1870 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1871 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 1872 &hdr->saddr, th->source, 1873 &hdr->daddr, ntohs(th->dest), 1874 inet6_iif(skb), inet6_sdif(skb)); 1875 if (sk) { 1876 skb->sk = sk; 1877 skb->destructor = sock_edemux; 1878 if (sk_fullsock(sk)) { 1879 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1880 1881 if (dst) 1882 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1883 if (dst && 1884 sk->sk_rx_dst_ifindex == skb->skb_iif) 1885 skb_dst_set_noref(skb, dst); 1886 } 1887 } 1888 } 1889 1890 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1891 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1892 .twsk_unique = tcp_twsk_unique, 1893 .twsk_destructor = tcp_twsk_destructor, 1894 }; 1895 1896 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1897 { 1898 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1899 } 1900 1901 const struct inet_connection_sock_af_ops ipv6_specific = { 1902 .queue_xmit = inet6_csk_xmit, 1903 .send_check = tcp_v6_send_check, 1904 .rebuild_header = inet6_sk_rebuild_header, 1905 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1906 .conn_request = tcp_v6_conn_request, 1907 .syn_recv_sock = tcp_v6_syn_recv_sock, 1908 .net_header_len = sizeof(struct ipv6hdr), 1909 .net_frag_header_len = sizeof(struct frag_hdr), 1910 .setsockopt = ipv6_setsockopt, 1911 .getsockopt = ipv6_getsockopt, 1912 .addr2sockaddr = inet6_csk_addr2sockaddr, 1913 .sockaddr_len = sizeof(struct sockaddr_in6), 1914 .mtu_reduced = tcp_v6_mtu_reduced, 1915 }; 1916 1917 #ifdef CONFIG_TCP_MD5SIG 1918 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1919 .md5_lookup = tcp_v6_md5_lookup, 1920 .calc_md5_hash = tcp_v6_md5_hash_skb, 1921 .md5_parse = tcp_v6_parse_md5_keys, 1922 }; 1923 #endif 1924 1925 /* 1926 * TCP over IPv4 via INET6 API 1927 */ 1928 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1929 .queue_xmit = ip_queue_xmit, 1930 .send_check = tcp_v4_send_check, 1931 .rebuild_header = inet_sk_rebuild_header, 1932 .sk_rx_dst_set = inet_sk_rx_dst_set, 1933 .conn_request = tcp_v6_conn_request, 1934 .syn_recv_sock = tcp_v6_syn_recv_sock, 1935 .net_header_len = sizeof(struct iphdr), 1936 .setsockopt = ipv6_setsockopt, 1937 .getsockopt = ipv6_getsockopt, 1938 .addr2sockaddr = inet6_csk_addr2sockaddr, 1939 .sockaddr_len = sizeof(struct sockaddr_in6), 1940 .mtu_reduced = tcp_v4_mtu_reduced, 1941 }; 1942 1943 #ifdef CONFIG_TCP_MD5SIG 1944 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1945 .md5_lookup = tcp_v4_md5_lookup, 1946 .calc_md5_hash = tcp_v4_md5_hash_skb, 1947 .md5_parse = tcp_v6_parse_md5_keys, 1948 }; 1949 #endif 1950 1951 /* NOTE: A lot of things set to zero explicitly by call to 1952 * sk_alloc() so need not be done here. 1953 */ 1954 static int tcp_v6_init_sock(struct sock *sk) 1955 { 1956 struct inet_connection_sock *icsk = inet_csk(sk); 1957 1958 tcp_init_sock(sk); 1959 1960 icsk->icsk_af_ops = &ipv6_specific; 1961 1962 #ifdef CONFIG_TCP_MD5SIG 1963 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1964 #endif 1965 1966 return 0; 1967 } 1968 1969 static void tcp_v6_destroy_sock(struct sock *sk) 1970 { 1971 tcp_v4_destroy_sock(sk); 1972 inet6_destroy_sock(sk); 1973 } 1974 1975 #ifdef CONFIG_PROC_FS 1976 /* Proc filesystem TCPv6 sock list dumping. */ 1977 static void get_openreq6(struct seq_file *seq, 1978 const struct request_sock *req, int i) 1979 { 1980 long ttd = req->rsk_timer.expires - jiffies; 1981 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1982 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1983 1984 if (ttd < 0) 1985 ttd = 0; 1986 1987 seq_printf(seq, 1988 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1989 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1990 i, 1991 src->s6_addr32[0], src->s6_addr32[1], 1992 src->s6_addr32[2], src->s6_addr32[3], 1993 inet_rsk(req)->ir_num, 1994 dest->s6_addr32[0], dest->s6_addr32[1], 1995 dest->s6_addr32[2], dest->s6_addr32[3], 1996 ntohs(inet_rsk(req)->ir_rmt_port), 1997 TCP_SYN_RECV, 1998 0, 0, /* could print option size, but that is af dependent. */ 1999 1, /* timers active (only the expire timer) */ 2000 jiffies_to_clock_t(ttd), 2001 req->num_timeout, 2002 from_kuid_munged(seq_user_ns(seq), 2003 sock_i_uid(req->rsk_listener)), 2004 0, /* non standard timer */ 2005 0, /* open_requests have no inode */ 2006 0, req); 2007 } 2008 2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2010 { 2011 const struct in6_addr *dest, *src; 2012 __u16 destp, srcp; 2013 int timer_active; 2014 unsigned long timer_expires; 2015 const struct inet_sock *inet = inet_sk(sp); 2016 const struct tcp_sock *tp = tcp_sk(sp); 2017 const struct inet_connection_sock *icsk = inet_csk(sp); 2018 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2019 int rx_queue; 2020 int state; 2021 2022 dest = &sp->sk_v6_daddr; 2023 src = &sp->sk_v6_rcv_saddr; 2024 destp = ntohs(inet->inet_dport); 2025 srcp = ntohs(inet->inet_sport); 2026 2027 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2028 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2029 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2030 timer_active = 1; 2031 timer_expires = icsk->icsk_timeout; 2032 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2033 timer_active = 4; 2034 timer_expires = icsk->icsk_timeout; 2035 } else if (timer_pending(&sp->sk_timer)) { 2036 timer_active = 2; 2037 timer_expires = sp->sk_timer.expires; 2038 } else { 2039 timer_active = 0; 2040 timer_expires = jiffies; 2041 } 2042 2043 state = inet_sk_state_load(sp); 2044 if (state == TCP_LISTEN) 2045 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2046 else 2047 /* Because we don't lock the socket, 2048 * we might find a transient negative value. 2049 */ 2050 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2051 READ_ONCE(tp->copied_seq), 0); 2052 2053 seq_printf(seq, 2054 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2055 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2056 i, 2057 src->s6_addr32[0], src->s6_addr32[1], 2058 src->s6_addr32[2], src->s6_addr32[3], srcp, 2059 dest->s6_addr32[0], dest->s6_addr32[1], 2060 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2061 state, 2062 READ_ONCE(tp->write_seq) - tp->snd_una, 2063 rx_queue, 2064 timer_active, 2065 jiffies_delta_to_clock_t(timer_expires - jiffies), 2066 icsk->icsk_retransmits, 2067 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2068 icsk->icsk_probes_out, 2069 sock_i_ino(sp), 2070 refcount_read(&sp->sk_refcnt), sp, 2071 jiffies_to_clock_t(icsk->icsk_rto), 2072 jiffies_to_clock_t(icsk->icsk_ack.ato), 2073 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2074 tcp_snd_cwnd(tp), 2075 state == TCP_LISTEN ? 2076 fastopenq->max_qlen : 2077 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2078 ); 2079 } 2080 2081 static void get_timewait6_sock(struct seq_file *seq, 2082 struct inet_timewait_sock *tw, int i) 2083 { 2084 long delta = tw->tw_timer.expires - jiffies; 2085 const struct in6_addr *dest, *src; 2086 __u16 destp, srcp; 2087 2088 dest = &tw->tw_v6_daddr; 2089 src = &tw->tw_v6_rcv_saddr; 2090 destp = ntohs(tw->tw_dport); 2091 srcp = ntohs(tw->tw_sport); 2092 2093 seq_printf(seq, 2094 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2095 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2096 i, 2097 src->s6_addr32[0], src->s6_addr32[1], 2098 src->s6_addr32[2], src->s6_addr32[3], srcp, 2099 dest->s6_addr32[0], dest->s6_addr32[1], 2100 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2101 tw->tw_substate, 0, 0, 2102 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2103 refcount_read(&tw->tw_refcnt), tw); 2104 } 2105 2106 static int tcp6_seq_show(struct seq_file *seq, void *v) 2107 { 2108 struct tcp_iter_state *st; 2109 struct sock *sk = v; 2110 2111 if (v == SEQ_START_TOKEN) { 2112 seq_puts(seq, 2113 " sl " 2114 "local_address " 2115 "remote_address " 2116 "st tx_queue rx_queue tr tm->when retrnsmt" 2117 " uid timeout inode\n"); 2118 goto out; 2119 } 2120 st = seq->private; 2121 2122 if (sk->sk_state == TCP_TIME_WAIT) 2123 get_timewait6_sock(seq, v, st->num); 2124 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2125 get_openreq6(seq, v, st->num); 2126 else 2127 get_tcp6_sock(seq, v, st->num); 2128 out: 2129 return 0; 2130 } 2131 2132 static const struct seq_operations tcp6_seq_ops = { 2133 .show = tcp6_seq_show, 2134 .start = tcp_seq_start, 2135 .next = tcp_seq_next, 2136 .stop = tcp_seq_stop, 2137 }; 2138 2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2140 .family = AF_INET6, 2141 }; 2142 2143 int __net_init tcp6_proc_init(struct net *net) 2144 { 2145 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2146 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2147 return -ENOMEM; 2148 return 0; 2149 } 2150 2151 void tcp6_proc_exit(struct net *net) 2152 { 2153 remove_proc_entry("tcp6", net->proc_net); 2154 } 2155 #endif 2156 2157 struct proto tcpv6_prot = { 2158 .name = "TCPv6", 2159 .owner = THIS_MODULE, 2160 .close = tcp_close, 2161 .pre_connect = tcp_v6_pre_connect, 2162 .connect = tcp_v6_connect, 2163 .disconnect = tcp_disconnect, 2164 .accept = inet_csk_accept, 2165 .ioctl = tcp_ioctl, 2166 .init = tcp_v6_init_sock, 2167 .destroy = tcp_v6_destroy_sock, 2168 .shutdown = tcp_shutdown, 2169 .setsockopt = tcp_setsockopt, 2170 .getsockopt = tcp_getsockopt, 2171 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2172 .keepalive = tcp_set_keepalive, 2173 .recvmsg = tcp_recvmsg, 2174 .sendmsg = tcp_sendmsg, 2175 .sendpage = tcp_sendpage, 2176 .backlog_rcv = tcp_v6_do_rcv, 2177 .release_cb = tcp_release_cb, 2178 .hash = inet6_hash, 2179 .unhash = inet_unhash, 2180 .get_port = inet_csk_get_port, 2181 .put_port = inet_put_port, 2182 #ifdef CONFIG_BPF_SYSCALL 2183 .psock_update_sk_prot = tcp_bpf_update_proto, 2184 #endif 2185 .enter_memory_pressure = tcp_enter_memory_pressure, 2186 .leave_memory_pressure = tcp_leave_memory_pressure, 2187 .stream_memory_free = tcp_stream_memory_free, 2188 .sockets_allocated = &tcp_sockets_allocated, 2189 2190 .memory_allocated = &tcp_memory_allocated, 2191 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2192 2193 .memory_pressure = &tcp_memory_pressure, 2194 .orphan_count = &tcp_orphan_count, 2195 .sysctl_mem = sysctl_tcp_mem, 2196 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2197 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2198 .max_header = MAX_TCP_HEADER, 2199 .obj_size = sizeof(struct tcp6_sock), 2200 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2201 .twsk_prot = &tcp6_timewait_sock_ops, 2202 .rsk_prot = &tcp6_request_sock_ops, 2203 .h.hashinfo = NULL, 2204 .no_autobind = true, 2205 .diag_destroy = tcp_abort, 2206 }; 2207 EXPORT_SYMBOL_GPL(tcpv6_prot); 2208 2209 static const struct inet6_protocol tcpv6_protocol = { 2210 .handler = tcp_v6_rcv, 2211 .err_handler = tcp_v6_err, 2212 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2213 }; 2214 2215 static struct inet_protosw tcpv6_protosw = { 2216 .type = SOCK_STREAM, 2217 .protocol = IPPROTO_TCP, 2218 .prot = &tcpv6_prot, 2219 .ops = &inet6_stream_ops, 2220 .flags = INET_PROTOSW_PERMANENT | 2221 INET_PROTOSW_ICSK, 2222 }; 2223 2224 static int __net_init tcpv6_net_init(struct net *net) 2225 { 2226 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2227 SOCK_RAW, IPPROTO_TCP, net); 2228 } 2229 2230 static void __net_exit tcpv6_net_exit(struct net *net) 2231 { 2232 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2233 } 2234 2235 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2236 { 2237 tcp_twsk_purge(net_exit_list, AF_INET6); 2238 } 2239 2240 static struct pernet_operations tcpv6_net_ops = { 2241 .init = tcpv6_net_init, 2242 .exit = tcpv6_net_exit, 2243 .exit_batch = tcpv6_net_exit_batch, 2244 }; 2245 2246 int __init tcpv6_init(void) 2247 { 2248 int ret; 2249 2250 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2251 if (ret) 2252 goto out; 2253 2254 /* register inet6 protocol */ 2255 ret = inet6_register_protosw(&tcpv6_protosw); 2256 if (ret) 2257 goto out_tcpv6_protocol; 2258 2259 ret = register_pernet_subsys(&tcpv6_net_ops); 2260 if (ret) 2261 goto out_tcpv6_protosw; 2262 2263 ret = mptcpv6_init(); 2264 if (ret) 2265 goto out_tcpv6_pernet_subsys; 2266 2267 out: 2268 return ret; 2269 2270 out_tcpv6_pernet_subsys: 2271 unregister_pernet_subsys(&tcpv6_net_ops); 2272 out_tcpv6_protosw: 2273 inet6_unregister_protosw(&tcpv6_protosw); 2274 out_tcpv6_protocol: 2275 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2276 goto out; 2277 } 2278 2279 void tcpv6_exit(void) 2280 { 2281 unregister_pernet_subsys(&tcpv6_net_ops); 2282 inet6_unregister_protosw(&tcpv6_protosw); 2283 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2284 } 2285