1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 sk->sk_rx_dst = dst; 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 err = inet6_hash_connect(tcp_death_row, sk); 312 if (err) 313 goto late_failure; 314 315 sk_set_txhash(sk); 316 317 if (likely(!tp->repair)) { 318 if (!tp->write_seq) 319 WRITE_ONCE(tp->write_seq, 320 secure_tcpv6_seq(np->saddr.s6_addr32, 321 sk->sk_v6_daddr.s6_addr32, 322 inet->inet_sport, 323 inet->inet_dport)); 324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 325 np->saddr.s6_addr32, 326 sk->sk_v6_daddr.s6_addr32); 327 } 328 329 if (tcp_fastopen_defer_connect(sk, &err)) 330 return err; 331 if (err) 332 goto late_failure; 333 334 err = tcp_connect(sk); 335 if (err) 336 goto late_failure; 337 338 return 0; 339 340 late_failure: 341 tcp_set_state(sk, TCP_CLOSE); 342 failure: 343 inet->inet_dport = 0; 344 sk->sk_route_caps = 0; 345 return err; 346 } 347 348 static void tcp_v6_mtu_reduced(struct sock *sk) 349 { 350 struct dst_entry *dst; 351 u32 mtu; 352 353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 354 return; 355 356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 357 358 /* Drop requests trying to increase our current mss. 359 * Check done in __ip6_rt_update_pmtu() is too late. 360 */ 361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 362 return; 363 364 dst = inet6_csk_update_pmtu(sk, mtu); 365 if (!dst) 366 return; 367 368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 369 tcp_sync_mss(sk, dst_mtu(dst)); 370 tcp_simple_retransmit(sk); 371 } 372 } 373 374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 375 u8 type, u8 code, int offset, __be32 info) 376 { 377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 379 struct net *net = dev_net(skb->dev); 380 struct request_sock *fastopen; 381 struct ipv6_pinfo *np; 382 struct tcp_sock *tp; 383 __u32 seq, snd_una; 384 struct sock *sk; 385 bool fatal; 386 int err; 387 388 sk = __inet6_lookup_established(net, &tcp_hashinfo, 389 &hdr->daddr, th->dest, 390 &hdr->saddr, ntohs(th->source), 391 skb->dev->ifindex, inet6_sdif(skb)); 392 393 if (!sk) { 394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 395 ICMP6_MIB_INERRORS); 396 return -ENOENT; 397 } 398 399 if (sk->sk_state == TCP_TIME_WAIT) { 400 inet_twsk_put(inet_twsk(sk)); 401 return 0; 402 } 403 seq = ntohl(th->seq); 404 fatal = icmpv6_err_convert(type, code, &err); 405 if (sk->sk_state == TCP_NEW_SYN_RECV) { 406 tcp_req_err(sk, seq, fatal); 407 return 0; 408 } 409 410 bh_lock_sock(sk); 411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 413 414 if (sk->sk_state == TCP_CLOSE) 415 goto out; 416 417 if (static_branch_unlikely(&ip6_min_hopcount)) { 418 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 419 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 421 goto out; 422 } 423 } 424 425 tp = tcp_sk(sk); 426 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 427 fastopen = rcu_dereference(tp->fastopen_rsk); 428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 429 if (sk->sk_state != TCP_LISTEN && 430 !between(seq, snd_una, tp->snd_nxt)) { 431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 432 goto out; 433 } 434 435 np = tcp_inet6_sk(sk); 436 437 if (type == NDISC_REDIRECT) { 438 if (!sock_owned_by_user(sk)) { 439 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 440 441 if (dst) 442 dst->ops->redirect(dst, sk, skb); 443 } 444 goto out; 445 } 446 447 if (type == ICMPV6_PKT_TOOBIG) { 448 u32 mtu = ntohl(info); 449 450 /* We are not interested in TCP_LISTEN and open_requests 451 * (SYN-ACKs send out by Linux are always <576bytes so 452 * they should go through unfragmented). 453 */ 454 if (sk->sk_state == TCP_LISTEN) 455 goto out; 456 457 if (!ip6_sk_accept_pmtu(sk)) 458 goto out; 459 460 if (mtu < IPV6_MIN_MTU) 461 goto out; 462 463 WRITE_ONCE(tp->mtu_info, mtu); 464 465 if (!sock_owned_by_user(sk)) 466 tcp_v6_mtu_reduced(sk); 467 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 468 &sk->sk_tsq_flags)) 469 sock_hold(sk); 470 goto out; 471 } 472 473 474 /* Might be for an request_sock */ 475 switch (sk->sk_state) { 476 case TCP_SYN_SENT: 477 case TCP_SYN_RECV: 478 /* Only in fast or simultaneous open. If a fast open socket is 479 * already accepted it is treated as a connected one below. 480 */ 481 if (fastopen && !fastopen->sk) 482 break; 483 484 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 485 486 if (!sock_owned_by_user(sk)) { 487 sk->sk_err = err; 488 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 489 490 tcp_done(sk); 491 } else 492 sk->sk_err_soft = err; 493 goto out; 494 case TCP_LISTEN: 495 break; 496 default: 497 /* check if this ICMP message allows revert of backoff. 498 * (see RFC 6069) 499 */ 500 if (!fastopen && type == ICMPV6_DEST_UNREACH && 501 code == ICMPV6_NOROUTE) 502 tcp_ld_RTO_revert(sk, seq); 503 } 504 505 if (!sock_owned_by_user(sk) && np->recverr) { 506 sk->sk_err = err; 507 sk_error_report(sk); 508 } else 509 sk->sk_err_soft = err; 510 511 out: 512 bh_unlock_sock(sk); 513 sock_put(sk); 514 return 0; 515 } 516 517 518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 519 struct flowi *fl, 520 struct request_sock *req, 521 struct tcp_fastopen_cookie *foc, 522 enum tcp_synack_type synack_type, 523 struct sk_buff *syn_skb) 524 { 525 struct inet_request_sock *ireq = inet_rsk(req); 526 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 527 struct ipv6_txoptions *opt; 528 struct flowi6 *fl6 = &fl->u.ip6; 529 struct sk_buff *skb; 530 int err = -ENOMEM; 531 u8 tclass; 532 533 /* First, grab a route. */ 534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 535 IPPROTO_TCP)) == NULL) 536 goto done; 537 538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 539 540 if (skb) { 541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 542 &ireq->ir_v6_rmt_addr); 543 544 fl6->daddr = ireq->ir_v6_rmt_addr; 545 if (np->repflow && ireq->pktopts) 546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 547 548 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 550 (np->tclass & INET_ECN_MASK) : 551 np->tclass; 552 553 if (!INET_ECN_is_capable(tclass) && 554 tcp_bpf_ca_needs_ecn((struct sock *)req)) 555 tclass |= INET_ECN_ECT_0; 556 557 rcu_read_lock(); 558 opt = ireq->ipv6_opt; 559 if (!opt) 560 opt = rcu_dereference(np->opt); 561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 562 tclass, sk->sk_priority); 563 rcu_read_unlock(); 564 err = net_xmit_eval(err); 565 } 566 567 done: 568 return err; 569 } 570 571 572 static void tcp_v6_reqsk_destructor(struct request_sock *req) 573 { 574 kfree(inet_rsk(req)->ipv6_opt); 575 consume_skb(inet_rsk(req)->pktopts); 576 } 577 578 #ifdef CONFIG_TCP_MD5SIG 579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 580 const struct in6_addr *addr, 581 int l3index) 582 { 583 return tcp_md5_do_lookup(sk, l3index, 584 (union tcp_md5_addr *)addr, AF_INET6); 585 } 586 587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 588 const struct sock *addr_sk) 589 { 590 int l3index; 591 592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 593 addr_sk->sk_bound_dev_if); 594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 595 l3index); 596 } 597 598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 599 sockptr_t optval, int optlen) 600 { 601 struct tcp_md5sig cmd; 602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 603 int l3index = 0; 604 u8 prefixlen; 605 u8 flags; 606 607 if (optlen < sizeof(cmd)) 608 return -EINVAL; 609 610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 611 return -EFAULT; 612 613 if (sin6->sin6_family != AF_INET6) 614 return -EINVAL; 615 616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 617 618 if (optname == TCP_MD5SIG_EXT && 619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 620 prefixlen = cmd.tcpm_prefixlen; 621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 622 prefixlen > 32)) 623 return -EINVAL; 624 } else { 625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 626 } 627 628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 630 struct net_device *dev; 631 632 rcu_read_lock(); 633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 634 if (dev && netif_is_l3_master(dev)) 635 l3index = dev->ifindex; 636 rcu_read_unlock(); 637 638 /* ok to reference set/not set outside of rcu; 639 * right now device MUST be an L3 master 640 */ 641 if (!dev || !l3index) 642 return -EINVAL; 643 } 644 645 if (!cmd.tcpm_keylen) { 646 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 648 AF_INET, prefixlen, 649 l3index, flags); 650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 651 AF_INET6, prefixlen, l3index, flags); 652 } 653 654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 655 return -EINVAL; 656 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, l3index, flags, 660 cmd.tcpm_key, cmd.tcpm_keylen, 661 GFP_KERNEL); 662 663 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 664 AF_INET6, prefixlen, l3index, flags, 665 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 666 } 667 668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 669 const struct in6_addr *daddr, 670 const struct in6_addr *saddr, 671 const struct tcphdr *th, int nbytes) 672 { 673 struct tcp6_pseudohdr *bp; 674 struct scatterlist sg; 675 struct tcphdr *_th; 676 677 bp = hp->scratch; 678 /* 1. TCP pseudo-header (RFC2460) */ 679 bp->saddr = *saddr; 680 bp->daddr = *daddr; 681 bp->protocol = cpu_to_be32(IPPROTO_TCP); 682 bp->len = cpu_to_be32(nbytes); 683 684 _th = (struct tcphdr *)(bp + 1); 685 memcpy(_th, th, sizeof(*th)); 686 _th->check = 0; 687 688 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 689 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 690 sizeof(*bp) + sizeof(*th)); 691 return crypto_ahash_update(hp->md5_req); 692 } 693 694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 695 const struct in6_addr *daddr, struct in6_addr *saddr, 696 const struct tcphdr *th) 697 { 698 struct tcp_md5sig_pool *hp; 699 struct ahash_request *req; 700 701 hp = tcp_get_md5sig_pool(); 702 if (!hp) 703 goto clear_hash_noput; 704 req = hp->md5_req; 705 706 if (crypto_ahash_init(req)) 707 goto clear_hash; 708 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 709 goto clear_hash; 710 if (tcp_md5_hash_key(hp, key)) 711 goto clear_hash; 712 ahash_request_set_crypt(req, NULL, md5_hash, 0); 713 if (crypto_ahash_final(req)) 714 goto clear_hash; 715 716 tcp_put_md5sig_pool(); 717 return 0; 718 719 clear_hash: 720 tcp_put_md5sig_pool(); 721 clear_hash_noput: 722 memset(md5_hash, 0, 16); 723 return 1; 724 } 725 726 static int tcp_v6_md5_hash_skb(char *md5_hash, 727 const struct tcp_md5sig_key *key, 728 const struct sock *sk, 729 const struct sk_buff *skb) 730 { 731 const struct in6_addr *saddr, *daddr; 732 struct tcp_md5sig_pool *hp; 733 struct ahash_request *req; 734 const struct tcphdr *th = tcp_hdr(skb); 735 736 if (sk) { /* valid for establish/request sockets */ 737 saddr = &sk->sk_v6_rcv_saddr; 738 daddr = &sk->sk_v6_daddr; 739 } else { 740 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 741 saddr = &ip6h->saddr; 742 daddr = &ip6h->daddr; 743 } 744 745 hp = tcp_get_md5sig_pool(); 746 if (!hp) 747 goto clear_hash_noput; 748 req = hp->md5_req; 749 750 if (crypto_ahash_init(req)) 751 goto clear_hash; 752 753 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 754 goto clear_hash; 755 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 756 goto clear_hash; 757 if (tcp_md5_hash_key(hp, key)) 758 goto clear_hash; 759 ahash_request_set_crypt(req, NULL, md5_hash, 0); 760 if (crypto_ahash_final(req)) 761 goto clear_hash; 762 763 tcp_put_md5sig_pool(); 764 return 0; 765 766 clear_hash: 767 tcp_put_md5sig_pool(); 768 clear_hash_noput: 769 memset(md5_hash, 0, 16); 770 return 1; 771 } 772 773 #endif 774 775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 776 const struct sk_buff *skb, 777 int dif, int sdif) 778 { 779 #ifdef CONFIG_TCP_MD5SIG 780 const __u8 *hash_location = NULL; 781 struct tcp_md5sig_key *hash_expected; 782 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 783 const struct tcphdr *th = tcp_hdr(skb); 784 int genhash, l3index; 785 u8 newhash[16]; 786 787 /* sdif set, means packet ingressed via a device 788 * in an L3 domain and dif is set to the l3mdev 789 */ 790 l3index = sdif ? dif : 0; 791 792 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 793 hash_location = tcp_parse_md5sig_option(th); 794 795 /* We've parsed the options - do we have a hash? */ 796 if (!hash_expected && !hash_location) 797 return false; 798 799 if (hash_expected && !hash_location) { 800 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 801 return true; 802 } 803 804 if (!hash_expected && hash_location) { 805 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 806 return true; 807 } 808 809 /* check the signature */ 810 genhash = tcp_v6_md5_hash_skb(newhash, 811 hash_expected, 812 NULL, skb); 813 814 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 815 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 816 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 817 genhash ? "failed" : "mismatch", 818 &ip6h->saddr, ntohs(th->source), 819 &ip6h->daddr, ntohs(th->dest), l3index); 820 return true; 821 } 822 #endif 823 return false; 824 } 825 826 static void tcp_v6_init_req(struct request_sock *req, 827 const struct sock *sk_listener, 828 struct sk_buff *skb) 829 { 830 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 831 struct inet_request_sock *ireq = inet_rsk(req); 832 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 833 834 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 835 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 836 837 /* So that link locals have meaning */ 838 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 839 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 840 ireq->ir_iif = tcp_v6_iif(skb); 841 842 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 843 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 844 np->rxopt.bits.rxinfo || 845 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 846 np->rxopt.bits.rxohlim || np->repflow)) { 847 refcount_inc(&skb->users); 848 ireq->pktopts = skb; 849 } 850 } 851 852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 853 struct sk_buff *skb, 854 struct flowi *fl, 855 struct request_sock *req) 856 { 857 tcp_v6_init_req(req, sk, skb); 858 859 if (security_inet_conn_request(sk, skb, req)) 860 return NULL; 861 862 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 863 } 864 865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 866 .family = AF_INET6, 867 .obj_size = sizeof(struct tcp6_request_sock), 868 .rtx_syn_ack = tcp_rtx_synack, 869 .send_ack = tcp_v6_reqsk_send_ack, 870 .destructor = tcp_v6_reqsk_destructor, 871 .send_reset = tcp_v6_send_reset, 872 .syn_ack_timeout = tcp_syn_ack_timeout, 873 }; 874 875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 876 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 877 sizeof(struct ipv6hdr), 878 #ifdef CONFIG_TCP_MD5SIG 879 .req_md5_lookup = tcp_v6_md5_lookup, 880 .calc_md5_hash = tcp_v6_md5_hash_skb, 881 #endif 882 #ifdef CONFIG_SYN_COOKIES 883 .cookie_init_seq = cookie_v6_init_sequence, 884 #endif 885 .route_req = tcp_v6_route_req, 886 .init_seq = tcp_v6_init_seq, 887 .init_ts_off = tcp_v6_init_ts_off, 888 .send_synack = tcp_v6_send_synack, 889 }; 890 891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 892 u32 ack, u32 win, u32 tsval, u32 tsecr, 893 int oif, struct tcp_md5sig_key *key, int rst, 894 u8 tclass, __be32 label, u32 priority) 895 { 896 const struct tcphdr *th = tcp_hdr(skb); 897 struct tcphdr *t1; 898 struct sk_buff *buff; 899 struct flowi6 fl6; 900 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 901 struct sock *ctl_sk = net->ipv6.tcp_sk; 902 unsigned int tot_len = sizeof(struct tcphdr); 903 __be32 mrst = 0, *topt; 904 struct dst_entry *dst; 905 __u32 mark = 0; 906 907 if (tsecr) 908 tot_len += TCPOLEN_TSTAMP_ALIGNED; 909 #ifdef CONFIG_TCP_MD5SIG 910 if (key) 911 tot_len += TCPOLEN_MD5SIG_ALIGNED; 912 #endif 913 914 #ifdef CONFIG_MPTCP 915 if (rst && !key) { 916 mrst = mptcp_reset_option(skb); 917 918 if (mrst) 919 tot_len += sizeof(__be32); 920 } 921 #endif 922 923 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 924 GFP_ATOMIC); 925 if (!buff) 926 return; 927 928 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 929 930 t1 = skb_push(buff, tot_len); 931 skb_reset_transport_header(buff); 932 933 /* Swap the send and the receive. */ 934 memset(t1, 0, sizeof(*t1)); 935 t1->dest = th->source; 936 t1->source = th->dest; 937 t1->doff = tot_len / 4; 938 t1->seq = htonl(seq); 939 t1->ack_seq = htonl(ack); 940 t1->ack = !rst || !th->ack; 941 t1->rst = rst; 942 t1->window = htons(win); 943 944 topt = (__be32 *)(t1 + 1); 945 946 if (tsecr) { 947 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 948 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 949 *topt++ = htonl(tsval); 950 *topt++ = htonl(tsecr); 951 } 952 953 if (mrst) 954 *topt++ = mrst; 955 956 #ifdef CONFIG_TCP_MD5SIG 957 if (key) { 958 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 959 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 960 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 961 &ipv6_hdr(skb)->saddr, 962 &ipv6_hdr(skb)->daddr, t1); 963 } 964 #endif 965 966 memset(&fl6, 0, sizeof(fl6)); 967 fl6.daddr = ipv6_hdr(skb)->saddr; 968 fl6.saddr = ipv6_hdr(skb)->daddr; 969 fl6.flowlabel = label; 970 971 buff->ip_summed = CHECKSUM_PARTIAL; 972 973 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 974 975 fl6.flowi6_proto = IPPROTO_TCP; 976 if (rt6_need_strict(&fl6.daddr) && !oif) 977 fl6.flowi6_oif = tcp_v6_iif(skb); 978 else { 979 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 980 oif = skb->skb_iif; 981 982 fl6.flowi6_oif = oif; 983 } 984 985 if (sk) { 986 if (sk->sk_state == TCP_TIME_WAIT) { 987 mark = inet_twsk(sk)->tw_mark; 988 /* autoflowlabel relies on buff->hash */ 989 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 990 PKT_HASH_TYPE_L4); 991 } else { 992 mark = sk->sk_mark; 993 } 994 buff->tstamp = tcp_transmit_time(sk); 995 } 996 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 997 fl6.fl6_dport = t1->dest; 998 fl6.fl6_sport = t1->source; 999 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 1000 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 1001 1002 /* Pass a socket to ip6_dst_lookup either it is for RST 1003 * Underlying function will use this to retrieve the network 1004 * namespace 1005 */ 1006 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 1007 if (!IS_ERR(dst)) { 1008 skb_dst_set(buff, dst); 1009 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 1010 tclass & ~INET_ECN_MASK, priority); 1011 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 1012 if (rst) 1013 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1014 return; 1015 } 1016 1017 kfree_skb(buff); 1018 } 1019 1020 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 1021 { 1022 const struct tcphdr *th = tcp_hdr(skb); 1023 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1024 u32 seq = 0, ack_seq = 0; 1025 struct tcp_md5sig_key *key = NULL; 1026 #ifdef CONFIG_TCP_MD5SIG 1027 const __u8 *hash_location = NULL; 1028 unsigned char newhash[16]; 1029 int genhash; 1030 struct sock *sk1 = NULL; 1031 #endif 1032 __be32 label = 0; 1033 u32 priority = 0; 1034 struct net *net; 1035 int oif = 0; 1036 1037 if (th->rst) 1038 return; 1039 1040 /* If sk not NULL, it means we did a successful lookup and incoming 1041 * route had to be correct. prequeue might have dropped our dst. 1042 */ 1043 if (!sk && !ipv6_unicast_destination(skb)) 1044 return; 1045 1046 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1047 #ifdef CONFIG_TCP_MD5SIG 1048 rcu_read_lock(); 1049 hash_location = tcp_parse_md5sig_option(th); 1050 if (sk && sk_fullsock(sk)) { 1051 int l3index; 1052 1053 /* sdif set, means packet ingressed via a device 1054 * in an L3 domain and inet_iif is set to it. 1055 */ 1056 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1057 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1058 } else if (hash_location) { 1059 int dif = tcp_v6_iif_l3_slave(skb); 1060 int sdif = tcp_v6_sdif(skb); 1061 int l3index; 1062 1063 /* 1064 * active side is lost. Try to find listening socket through 1065 * source port, and then find md5 key through listening socket. 1066 * we are not loose security here: 1067 * Incoming packet is checked with md5 hash with finding key, 1068 * no RST generated if md5 hash doesn't match. 1069 */ 1070 sk1 = inet6_lookup_listener(net, 1071 &tcp_hashinfo, NULL, 0, 1072 &ipv6h->saddr, 1073 th->source, &ipv6h->daddr, 1074 ntohs(th->source), dif, sdif); 1075 if (!sk1) 1076 goto out; 1077 1078 /* sdif set, means packet ingressed via a device 1079 * in an L3 domain and dif is set to it. 1080 */ 1081 l3index = tcp_v6_sdif(skb) ? dif : 0; 1082 1083 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1084 if (!key) 1085 goto out; 1086 1087 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1088 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1089 goto out; 1090 } 1091 #endif 1092 1093 if (th->ack) 1094 seq = ntohl(th->ack_seq); 1095 else 1096 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1097 (th->doff << 2); 1098 1099 if (sk) { 1100 oif = sk->sk_bound_dev_if; 1101 if (sk_fullsock(sk)) { 1102 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1103 1104 trace_tcp_send_reset(sk, skb); 1105 if (np->repflow) 1106 label = ip6_flowlabel(ipv6h); 1107 priority = sk->sk_priority; 1108 } 1109 if (sk->sk_state == TCP_TIME_WAIT) { 1110 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1111 priority = inet_twsk(sk)->tw_priority; 1112 } 1113 } else { 1114 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1115 label = ip6_flowlabel(ipv6h); 1116 } 1117 1118 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1119 ipv6_get_dsfield(ipv6h), label, priority); 1120 1121 #ifdef CONFIG_TCP_MD5SIG 1122 out: 1123 rcu_read_unlock(); 1124 #endif 1125 } 1126 1127 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1128 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1129 struct tcp_md5sig_key *key, u8 tclass, 1130 __be32 label, u32 priority) 1131 { 1132 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1133 tclass, label, priority); 1134 } 1135 1136 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1137 { 1138 struct inet_timewait_sock *tw = inet_twsk(sk); 1139 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1140 1141 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1142 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1143 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1144 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1145 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1146 1147 inet_twsk_put(tw); 1148 } 1149 1150 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1151 struct request_sock *req) 1152 { 1153 int l3index; 1154 1155 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1156 1157 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1158 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1159 */ 1160 /* RFC 7323 2.3 1161 * The window field (SEG.WND) of every outgoing segment, with the 1162 * exception of <SYN> segments, MUST be right-shifted by 1163 * Rcv.Wind.Shift bits: 1164 */ 1165 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1166 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1167 tcp_rsk(req)->rcv_nxt, 1168 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1169 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1170 req->ts_recent, sk->sk_bound_dev_if, 1171 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1172 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1173 } 1174 1175 1176 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1177 { 1178 #ifdef CONFIG_SYN_COOKIES 1179 const struct tcphdr *th = tcp_hdr(skb); 1180 1181 if (!th->syn) 1182 sk = cookie_v6_check(sk, skb); 1183 #endif 1184 return sk; 1185 } 1186 1187 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1188 struct tcphdr *th, u32 *cookie) 1189 { 1190 u16 mss = 0; 1191 #ifdef CONFIG_SYN_COOKIES 1192 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1193 &tcp_request_sock_ipv6_ops, sk, th); 1194 if (mss) { 1195 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1196 tcp_synq_overflow(sk); 1197 } 1198 #endif 1199 return mss; 1200 } 1201 1202 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1203 { 1204 if (skb->protocol == htons(ETH_P_IP)) 1205 return tcp_v4_conn_request(sk, skb); 1206 1207 if (!ipv6_unicast_destination(skb)) 1208 goto drop; 1209 1210 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1211 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1212 return 0; 1213 } 1214 1215 return tcp_conn_request(&tcp6_request_sock_ops, 1216 &tcp_request_sock_ipv6_ops, sk, skb); 1217 1218 drop: 1219 tcp_listendrop(sk); 1220 return 0; /* don't send reset */ 1221 } 1222 1223 static void tcp_v6_restore_cb(struct sk_buff *skb) 1224 { 1225 /* We need to move header back to the beginning if xfrm6_policy_check() 1226 * and tcp_v6_fill_cb() are going to be called again. 1227 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1228 */ 1229 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1230 sizeof(struct inet6_skb_parm)); 1231 } 1232 1233 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1234 struct request_sock *req, 1235 struct dst_entry *dst, 1236 struct request_sock *req_unhash, 1237 bool *own_req) 1238 { 1239 struct inet_request_sock *ireq; 1240 struct ipv6_pinfo *newnp; 1241 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1242 struct ipv6_txoptions *opt; 1243 struct inet_sock *newinet; 1244 bool found_dup_sk = false; 1245 struct tcp_sock *newtp; 1246 struct sock *newsk; 1247 #ifdef CONFIG_TCP_MD5SIG 1248 struct tcp_md5sig_key *key; 1249 int l3index; 1250 #endif 1251 struct flowi6 fl6; 1252 1253 if (skb->protocol == htons(ETH_P_IP)) { 1254 /* 1255 * v6 mapped 1256 */ 1257 1258 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1259 req_unhash, own_req); 1260 1261 if (!newsk) 1262 return NULL; 1263 1264 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1265 1266 newnp = tcp_inet6_sk(newsk); 1267 newtp = tcp_sk(newsk); 1268 1269 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1270 1271 newnp->saddr = newsk->sk_v6_rcv_saddr; 1272 1273 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1274 if (sk_is_mptcp(newsk)) 1275 mptcpv6_handle_mapped(newsk, true); 1276 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1277 #ifdef CONFIG_TCP_MD5SIG 1278 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1279 #endif 1280 1281 newnp->ipv6_mc_list = NULL; 1282 newnp->ipv6_ac_list = NULL; 1283 newnp->ipv6_fl_list = NULL; 1284 newnp->pktoptions = NULL; 1285 newnp->opt = NULL; 1286 newnp->mcast_oif = inet_iif(skb); 1287 newnp->mcast_hops = ip_hdr(skb)->ttl; 1288 newnp->rcv_flowinfo = 0; 1289 if (np->repflow) 1290 newnp->flow_label = 0; 1291 1292 /* 1293 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1294 * here, tcp_create_openreq_child now does this for us, see the comment in 1295 * that function for the gory details. -acme 1296 */ 1297 1298 /* It is tricky place. Until this moment IPv4 tcp 1299 worked with IPv6 icsk.icsk_af_ops. 1300 Sync it now. 1301 */ 1302 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1303 1304 return newsk; 1305 } 1306 1307 ireq = inet_rsk(req); 1308 1309 if (sk_acceptq_is_full(sk)) 1310 goto out_overflow; 1311 1312 if (!dst) { 1313 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1314 if (!dst) 1315 goto out; 1316 } 1317 1318 newsk = tcp_create_openreq_child(sk, req, skb); 1319 if (!newsk) 1320 goto out_nonewsk; 1321 1322 /* 1323 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1324 * count here, tcp_create_openreq_child now does this for us, see the 1325 * comment in that function for the gory details. -acme 1326 */ 1327 1328 newsk->sk_gso_type = SKB_GSO_TCPV6; 1329 ip6_dst_store(newsk, dst, NULL, NULL); 1330 inet6_sk_rx_dst_set(newsk, skb); 1331 1332 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1333 1334 newtp = tcp_sk(newsk); 1335 newinet = inet_sk(newsk); 1336 newnp = tcp_inet6_sk(newsk); 1337 1338 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1339 1340 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1341 newnp->saddr = ireq->ir_v6_loc_addr; 1342 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1343 newsk->sk_bound_dev_if = ireq->ir_iif; 1344 1345 /* Now IPv6 options... 1346 1347 First: no IPv4 options. 1348 */ 1349 newinet->inet_opt = NULL; 1350 newnp->ipv6_mc_list = NULL; 1351 newnp->ipv6_ac_list = NULL; 1352 newnp->ipv6_fl_list = NULL; 1353 1354 /* Clone RX bits */ 1355 newnp->rxopt.all = np->rxopt.all; 1356 1357 newnp->pktoptions = NULL; 1358 newnp->opt = NULL; 1359 newnp->mcast_oif = tcp_v6_iif(skb); 1360 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1361 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1362 if (np->repflow) 1363 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1364 1365 /* Set ToS of the new socket based upon the value of incoming SYN. 1366 * ECT bits are set later in tcp_init_transfer(). 1367 */ 1368 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1369 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1370 1371 /* Clone native IPv6 options from listening socket (if any) 1372 1373 Yes, keeping reference count would be much more clever, 1374 but we make one more one thing there: reattach optmem 1375 to newsk. 1376 */ 1377 opt = ireq->ipv6_opt; 1378 if (!opt) 1379 opt = rcu_dereference(np->opt); 1380 if (opt) { 1381 opt = ipv6_dup_options(newsk, opt); 1382 RCU_INIT_POINTER(newnp->opt, opt); 1383 } 1384 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1385 if (opt) 1386 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1387 opt->opt_flen; 1388 1389 tcp_ca_openreq_child(newsk, dst); 1390 1391 tcp_sync_mss(newsk, dst_mtu(dst)); 1392 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1393 1394 tcp_initialize_rcv_mss(newsk); 1395 1396 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1397 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1398 1399 #ifdef CONFIG_TCP_MD5SIG 1400 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1401 1402 /* Copy over the MD5 key from the original socket */ 1403 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1404 if (key) { 1405 /* We're using one, so create a matching key 1406 * on the newsk structure. If we fail to get 1407 * memory, then we end up not copying the key 1408 * across. Shucks. 1409 */ 1410 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1411 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1412 sk_gfp_mask(sk, GFP_ATOMIC)); 1413 } 1414 #endif 1415 1416 if (__inet_inherit_port(sk, newsk) < 0) { 1417 inet_csk_prepare_forced_close(newsk); 1418 tcp_done(newsk); 1419 goto out; 1420 } 1421 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1422 &found_dup_sk); 1423 if (*own_req) { 1424 tcp_move_syn(newtp, req); 1425 1426 /* Clone pktoptions received with SYN, if we own the req */ 1427 if (ireq->pktopts) { 1428 newnp->pktoptions = skb_clone(ireq->pktopts, 1429 sk_gfp_mask(sk, GFP_ATOMIC)); 1430 consume_skb(ireq->pktopts); 1431 ireq->pktopts = NULL; 1432 if (newnp->pktoptions) { 1433 tcp_v6_restore_cb(newnp->pktoptions); 1434 skb_set_owner_r(newnp->pktoptions, newsk); 1435 } 1436 } 1437 } else { 1438 if (!req_unhash && found_dup_sk) { 1439 /* This code path should only be executed in the 1440 * syncookie case only 1441 */ 1442 bh_unlock_sock(newsk); 1443 sock_put(newsk); 1444 newsk = NULL; 1445 } 1446 } 1447 1448 return newsk; 1449 1450 out_overflow: 1451 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1452 out_nonewsk: 1453 dst_release(dst); 1454 out: 1455 tcp_listendrop(sk); 1456 return NULL; 1457 } 1458 1459 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1460 u32)); 1461 /* The socket must have it's spinlock held when we get 1462 * here, unless it is a TCP_LISTEN socket. 1463 * 1464 * We have a potential double-lock case here, so even when 1465 * doing backlog processing we use the BH locking scheme. 1466 * This is because we cannot sleep with the original spinlock 1467 * held. 1468 */ 1469 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1470 { 1471 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1472 struct sk_buff *opt_skb = NULL; 1473 struct tcp_sock *tp; 1474 1475 /* Imagine: socket is IPv6. IPv4 packet arrives, 1476 goes to IPv4 receive handler and backlogged. 1477 From backlog it always goes here. Kerboom... 1478 Fortunately, tcp_rcv_established and rcv_established 1479 handle them correctly, but it is not case with 1480 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1481 */ 1482 1483 if (skb->protocol == htons(ETH_P_IP)) 1484 return tcp_v4_do_rcv(sk, skb); 1485 1486 /* 1487 * socket locking is here for SMP purposes as backlog rcv 1488 * is currently called with bh processing disabled. 1489 */ 1490 1491 /* Do Stevens' IPV6_PKTOPTIONS. 1492 1493 Yes, guys, it is the only place in our code, where we 1494 may make it not affecting IPv4. 1495 The rest of code is protocol independent, 1496 and I do not like idea to uglify IPv4. 1497 1498 Actually, all the idea behind IPV6_PKTOPTIONS 1499 looks not very well thought. For now we latch 1500 options, received in the last packet, enqueued 1501 by tcp. Feel free to propose better solution. 1502 --ANK (980728) 1503 */ 1504 if (np->rxopt.all) 1505 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1506 1507 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1508 struct dst_entry *dst = sk->sk_rx_dst; 1509 1510 sock_rps_save_rxhash(sk, skb); 1511 sk_mark_napi_id(sk, skb); 1512 if (dst) { 1513 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1514 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1515 dst, sk->sk_rx_dst_cookie) == NULL) { 1516 dst_release(dst); 1517 sk->sk_rx_dst = NULL; 1518 } 1519 } 1520 1521 tcp_rcv_established(sk, skb); 1522 if (opt_skb) 1523 goto ipv6_pktoptions; 1524 return 0; 1525 } 1526 1527 if (tcp_checksum_complete(skb)) 1528 goto csum_err; 1529 1530 if (sk->sk_state == TCP_LISTEN) { 1531 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1532 1533 if (!nsk) 1534 goto discard; 1535 1536 if (nsk != sk) { 1537 if (tcp_child_process(sk, nsk, skb)) 1538 goto reset; 1539 if (opt_skb) 1540 __kfree_skb(opt_skb); 1541 return 0; 1542 } 1543 } else 1544 sock_rps_save_rxhash(sk, skb); 1545 1546 if (tcp_rcv_state_process(sk, skb)) 1547 goto reset; 1548 if (opt_skb) 1549 goto ipv6_pktoptions; 1550 return 0; 1551 1552 reset: 1553 tcp_v6_send_reset(sk, skb); 1554 discard: 1555 if (opt_skb) 1556 __kfree_skb(opt_skb); 1557 kfree_skb(skb); 1558 return 0; 1559 csum_err: 1560 trace_tcp_bad_csum(skb); 1561 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1562 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1563 goto discard; 1564 1565 1566 ipv6_pktoptions: 1567 /* Do you ask, what is it? 1568 1569 1. skb was enqueued by tcp. 1570 2. skb is added to tail of read queue, rather than out of order. 1571 3. socket is not in passive state. 1572 4. Finally, it really contains options, which user wants to receive. 1573 */ 1574 tp = tcp_sk(sk); 1575 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1576 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1577 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1578 np->mcast_oif = tcp_v6_iif(opt_skb); 1579 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1580 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1581 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1582 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1583 if (np->repflow) 1584 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1585 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1586 skb_set_owner_r(opt_skb, sk); 1587 tcp_v6_restore_cb(opt_skb); 1588 opt_skb = xchg(&np->pktoptions, opt_skb); 1589 } else { 1590 __kfree_skb(opt_skb); 1591 opt_skb = xchg(&np->pktoptions, NULL); 1592 } 1593 } 1594 1595 consume_skb(opt_skb); 1596 return 0; 1597 } 1598 1599 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1600 const struct tcphdr *th) 1601 { 1602 /* This is tricky: we move IP6CB at its correct location into 1603 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1604 * _decode_session6() uses IP6CB(). 1605 * barrier() makes sure compiler won't play aliasing games. 1606 */ 1607 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1608 sizeof(struct inet6_skb_parm)); 1609 barrier(); 1610 1611 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1612 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1613 skb->len - th->doff*4); 1614 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1615 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1616 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1617 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1618 TCP_SKB_CB(skb)->sacked = 0; 1619 TCP_SKB_CB(skb)->has_rxtstamp = 1620 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1621 } 1622 1623 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1624 { 1625 int sdif = inet6_sdif(skb); 1626 int dif = inet6_iif(skb); 1627 const struct tcphdr *th; 1628 const struct ipv6hdr *hdr; 1629 bool refcounted; 1630 struct sock *sk; 1631 int ret; 1632 struct net *net = dev_net(skb->dev); 1633 1634 if (skb->pkt_type != PACKET_HOST) 1635 goto discard_it; 1636 1637 /* 1638 * Count it even if it's bad. 1639 */ 1640 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1641 1642 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1643 goto discard_it; 1644 1645 th = (const struct tcphdr *)skb->data; 1646 1647 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1648 goto bad_packet; 1649 if (!pskb_may_pull(skb, th->doff*4)) 1650 goto discard_it; 1651 1652 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1653 goto csum_error; 1654 1655 th = (const struct tcphdr *)skb->data; 1656 hdr = ipv6_hdr(skb); 1657 1658 lookup: 1659 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1660 th->source, th->dest, inet6_iif(skb), sdif, 1661 &refcounted); 1662 if (!sk) 1663 goto no_tcp_socket; 1664 1665 process: 1666 if (sk->sk_state == TCP_TIME_WAIT) 1667 goto do_time_wait; 1668 1669 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1670 struct request_sock *req = inet_reqsk(sk); 1671 bool req_stolen = false; 1672 struct sock *nsk; 1673 1674 sk = req->rsk_listener; 1675 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1676 sk_drops_add(sk, skb); 1677 reqsk_put(req); 1678 goto discard_it; 1679 } 1680 if (tcp_checksum_complete(skb)) { 1681 reqsk_put(req); 1682 goto csum_error; 1683 } 1684 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1685 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1686 if (!nsk) { 1687 inet_csk_reqsk_queue_drop_and_put(sk, req); 1688 goto lookup; 1689 } 1690 sk = nsk; 1691 /* reuseport_migrate_sock() has already held one sk_refcnt 1692 * before returning. 1693 */ 1694 } else { 1695 sock_hold(sk); 1696 } 1697 refcounted = true; 1698 nsk = NULL; 1699 if (!tcp_filter(sk, skb)) { 1700 th = (const struct tcphdr *)skb->data; 1701 hdr = ipv6_hdr(skb); 1702 tcp_v6_fill_cb(skb, hdr, th); 1703 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1704 } 1705 if (!nsk) { 1706 reqsk_put(req); 1707 if (req_stolen) { 1708 /* Another cpu got exclusive access to req 1709 * and created a full blown socket. 1710 * Try to feed this packet to this socket 1711 * instead of discarding it. 1712 */ 1713 tcp_v6_restore_cb(skb); 1714 sock_put(sk); 1715 goto lookup; 1716 } 1717 goto discard_and_relse; 1718 } 1719 if (nsk == sk) { 1720 reqsk_put(req); 1721 tcp_v6_restore_cb(skb); 1722 } else if (tcp_child_process(sk, nsk, skb)) { 1723 tcp_v6_send_reset(nsk, skb); 1724 goto discard_and_relse; 1725 } else { 1726 sock_put(sk); 1727 return 0; 1728 } 1729 } 1730 1731 if (static_branch_unlikely(&ip6_min_hopcount)) { 1732 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1733 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1734 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1735 goto discard_and_relse; 1736 } 1737 } 1738 1739 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1740 goto discard_and_relse; 1741 1742 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1743 goto discard_and_relse; 1744 1745 if (tcp_filter(sk, skb)) 1746 goto discard_and_relse; 1747 th = (const struct tcphdr *)skb->data; 1748 hdr = ipv6_hdr(skb); 1749 tcp_v6_fill_cb(skb, hdr, th); 1750 1751 skb->dev = NULL; 1752 1753 if (sk->sk_state == TCP_LISTEN) { 1754 ret = tcp_v6_do_rcv(sk, skb); 1755 goto put_and_return; 1756 } 1757 1758 sk_incoming_cpu_update(sk); 1759 1760 bh_lock_sock_nested(sk); 1761 tcp_segs_in(tcp_sk(sk), skb); 1762 ret = 0; 1763 if (!sock_owned_by_user(sk)) { 1764 ret = tcp_v6_do_rcv(sk, skb); 1765 } else { 1766 if (tcp_add_backlog(sk, skb)) 1767 goto discard_and_relse; 1768 } 1769 bh_unlock_sock(sk); 1770 put_and_return: 1771 if (refcounted) 1772 sock_put(sk); 1773 return ret ? -1 : 0; 1774 1775 no_tcp_socket: 1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1777 goto discard_it; 1778 1779 tcp_v6_fill_cb(skb, hdr, th); 1780 1781 if (tcp_checksum_complete(skb)) { 1782 csum_error: 1783 trace_tcp_bad_csum(skb); 1784 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1785 bad_packet: 1786 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1787 } else { 1788 tcp_v6_send_reset(NULL, skb); 1789 } 1790 1791 discard_it: 1792 kfree_skb(skb); 1793 return 0; 1794 1795 discard_and_relse: 1796 sk_drops_add(sk, skb); 1797 if (refcounted) 1798 sock_put(sk); 1799 goto discard_it; 1800 1801 do_time_wait: 1802 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1803 inet_twsk_put(inet_twsk(sk)); 1804 goto discard_it; 1805 } 1806 1807 tcp_v6_fill_cb(skb, hdr, th); 1808 1809 if (tcp_checksum_complete(skb)) { 1810 inet_twsk_put(inet_twsk(sk)); 1811 goto csum_error; 1812 } 1813 1814 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1815 case TCP_TW_SYN: 1816 { 1817 struct sock *sk2; 1818 1819 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1820 skb, __tcp_hdrlen(th), 1821 &ipv6_hdr(skb)->saddr, th->source, 1822 &ipv6_hdr(skb)->daddr, 1823 ntohs(th->dest), 1824 tcp_v6_iif_l3_slave(skb), 1825 sdif); 1826 if (sk2) { 1827 struct inet_timewait_sock *tw = inet_twsk(sk); 1828 inet_twsk_deschedule_put(tw); 1829 sk = sk2; 1830 tcp_v6_restore_cb(skb); 1831 refcounted = false; 1832 goto process; 1833 } 1834 } 1835 /* to ACK */ 1836 fallthrough; 1837 case TCP_TW_ACK: 1838 tcp_v6_timewait_ack(sk, skb); 1839 break; 1840 case TCP_TW_RST: 1841 tcp_v6_send_reset(sk, skb); 1842 inet_twsk_deschedule_put(inet_twsk(sk)); 1843 goto discard_it; 1844 case TCP_TW_SUCCESS: 1845 ; 1846 } 1847 goto discard_it; 1848 } 1849 1850 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1851 { 1852 const struct ipv6hdr *hdr; 1853 const struct tcphdr *th; 1854 struct sock *sk; 1855 1856 if (skb->pkt_type != PACKET_HOST) 1857 return; 1858 1859 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1860 return; 1861 1862 hdr = ipv6_hdr(skb); 1863 th = tcp_hdr(skb); 1864 1865 if (th->doff < sizeof(struct tcphdr) / 4) 1866 return; 1867 1868 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1869 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1870 &hdr->saddr, th->source, 1871 &hdr->daddr, ntohs(th->dest), 1872 inet6_iif(skb), inet6_sdif(skb)); 1873 if (sk) { 1874 skb->sk = sk; 1875 skb->destructor = sock_edemux; 1876 if (sk_fullsock(sk)) { 1877 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1878 1879 if (dst) 1880 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1881 if (dst && 1882 sk->sk_rx_dst_ifindex == skb->skb_iif) 1883 skb_dst_set_noref(skb, dst); 1884 } 1885 } 1886 } 1887 1888 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1889 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1890 .twsk_unique = tcp_twsk_unique, 1891 .twsk_destructor = tcp_twsk_destructor, 1892 }; 1893 1894 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1895 { 1896 struct ipv6_pinfo *np = inet6_sk(sk); 1897 1898 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); 1899 } 1900 1901 const struct inet_connection_sock_af_ops ipv6_specific = { 1902 .queue_xmit = inet6_csk_xmit, 1903 .send_check = tcp_v6_send_check, 1904 .rebuild_header = inet6_sk_rebuild_header, 1905 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1906 .conn_request = tcp_v6_conn_request, 1907 .syn_recv_sock = tcp_v6_syn_recv_sock, 1908 .net_header_len = sizeof(struct ipv6hdr), 1909 .net_frag_header_len = sizeof(struct frag_hdr), 1910 .setsockopt = ipv6_setsockopt, 1911 .getsockopt = ipv6_getsockopt, 1912 .addr2sockaddr = inet6_csk_addr2sockaddr, 1913 .sockaddr_len = sizeof(struct sockaddr_in6), 1914 .mtu_reduced = tcp_v6_mtu_reduced, 1915 }; 1916 1917 #ifdef CONFIG_TCP_MD5SIG 1918 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1919 .md5_lookup = tcp_v6_md5_lookup, 1920 .calc_md5_hash = tcp_v6_md5_hash_skb, 1921 .md5_parse = tcp_v6_parse_md5_keys, 1922 }; 1923 #endif 1924 1925 /* 1926 * TCP over IPv4 via INET6 API 1927 */ 1928 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1929 .queue_xmit = ip_queue_xmit, 1930 .send_check = tcp_v4_send_check, 1931 .rebuild_header = inet_sk_rebuild_header, 1932 .sk_rx_dst_set = inet_sk_rx_dst_set, 1933 .conn_request = tcp_v6_conn_request, 1934 .syn_recv_sock = tcp_v6_syn_recv_sock, 1935 .net_header_len = sizeof(struct iphdr), 1936 .setsockopt = ipv6_setsockopt, 1937 .getsockopt = ipv6_getsockopt, 1938 .addr2sockaddr = inet6_csk_addr2sockaddr, 1939 .sockaddr_len = sizeof(struct sockaddr_in6), 1940 .mtu_reduced = tcp_v4_mtu_reduced, 1941 }; 1942 1943 #ifdef CONFIG_TCP_MD5SIG 1944 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1945 .md5_lookup = tcp_v4_md5_lookup, 1946 .calc_md5_hash = tcp_v4_md5_hash_skb, 1947 .md5_parse = tcp_v6_parse_md5_keys, 1948 }; 1949 #endif 1950 1951 /* NOTE: A lot of things set to zero explicitly by call to 1952 * sk_alloc() so need not be done here. 1953 */ 1954 static int tcp_v6_init_sock(struct sock *sk) 1955 { 1956 struct inet_connection_sock *icsk = inet_csk(sk); 1957 1958 tcp_init_sock(sk); 1959 1960 icsk->icsk_af_ops = &ipv6_specific; 1961 1962 #ifdef CONFIG_TCP_MD5SIG 1963 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1964 #endif 1965 1966 return 0; 1967 } 1968 1969 static void tcp_v6_destroy_sock(struct sock *sk) 1970 { 1971 tcp_v4_destroy_sock(sk); 1972 inet6_destroy_sock(sk); 1973 } 1974 1975 #ifdef CONFIG_PROC_FS 1976 /* Proc filesystem TCPv6 sock list dumping. */ 1977 static void get_openreq6(struct seq_file *seq, 1978 const struct request_sock *req, int i) 1979 { 1980 long ttd = req->rsk_timer.expires - jiffies; 1981 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1982 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1983 1984 if (ttd < 0) 1985 ttd = 0; 1986 1987 seq_printf(seq, 1988 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1989 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1990 i, 1991 src->s6_addr32[0], src->s6_addr32[1], 1992 src->s6_addr32[2], src->s6_addr32[3], 1993 inet_rsk(req)->ir_num, 1994 dest->s6_addr32[0], dest->s6_addr32[1], 1995 dest->s6_addr32[2], dest->s6_addr32[3], 1996 ntohs(inet_rsk(req)->ir_rmt_port), 1997 TCP_SYN_RECV, 1998 0, 0, /* could print option size, but that is af dependent. */ 1999 1, /* timers active (only the expire timer) */ 2000 jiffies_to_clock_t(ttd), 2001 req->num_timeout, 2002 from_kuid_munged(seq_user_ns(seq), 2003 sock_i_uid(req->rsk_listener)), 2004 0, /* non standard timer */ 2005 0, /* open_requests have no inode */ 2006 0, req); 2007 } 2008 2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2010 { 2011 const struct in6_addr *dest, *src; 2012 __u16 destp, srcp; 2013 int timer_active; 2014 unsigned long timer_expires; 2015 const struct inet_sock *inet = inet_sk(sp); 2016 const struct tcp_sock *tp = tcp_sk(sp); 2017 const struct inet_connection_sock *icsk = inet_csk(sp); 2018 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2019 int rx_queue; 2020 int state; 2021 2022 dest = &sp->sk_v6_daddr; 2023 src = &sp->sk_v6_rcv_saddr; 2024 destp = ntohs(inet->inet_dport); 2025 srcp = ntohs(inet->inet_sport); 2026 2027 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2028 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2029 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2030 timer_active = 1; 2031 timer_expires = icsk->icsk_timeout; 2032 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2033 timer_active = 4; 2034 timer_expires = icsk->icsk_timeout; 2035 } else if (timer_pending(&sp->sk_timer)) { 2036 timer_active = 2; 2037 timer_expires = sp->sk_timer.expires; 2038 } else { 2039 timer_active = 0; 2040 timer_expires = jiffies; 2041 } 2042 2043 state = inet_sk_state_load(sp); 2044 if (state == TCP_LISTEN) 2045 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2046 else 2047 /* Because we don't lock the socket, 2048 * we might find a transient negative value. 2049 */ 2050 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2051 READ_ONCE(tp->copied_seq), 0); 2052 2053 seq_printf(seq, 2054 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2055 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2056 i, 2057 src->s6_addr32[0], src->s6_addr32[1], 2058 src->s6_addr32[2], src->s6_addr32[3], srcp, 2059 dest->s6_addr32[0], dest->s6_addr32[1], 2060 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2061 state, 2062 READ_ONCE(tp->write_seq) - tp->snd_una, 2063 rx_queue, 2064 timer_active, 2065 jiffies_delta_to_clock_t(timer_expires - jiffies), 2066 icsk->icsk_retransmits, 2067 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2068 icsk->icsk_probes_out, 2069 sock_i_ino(sp), 2070 refcount_read(&sp->sk_refcnt), sp, 2071 jiffies_to_clock_t(icsk->icsk_rto), 2072 jiffies_to_clock_t(icsk->icsk_ack.ato), 2073 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2074 tp->snd_cwnd, 2075 state == TCP_LISTEN ? 2076 fastopenq->max_qlen : 2077 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2078 ); 2079 } 2080 2081 static void get_timewait6_sock(struct seq_file *seq, 2082 struct inet_timewait_sock *tw, int i) 2083 { 2084 long delta = tw->tw_timer.expires - jiffies; 2085 const struct in6_addr *dest, *src; 2086 __u16 destp, srcp; 2087 2088 dest = &tw->tw_v6_daddr; 2089 src = &tw->tw_v6_rcv_saddr; 2090 destp = ntohs(tw->tw_dport); 2091 srcp = ntohs(tw->tw_sport); 2092 2093 seq_printf(seq, 2094 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2095 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2096 i, 2097 src->s6_addr32[0], src->s6_addr32[1], 2098 src->s6_addr32[2], src->s6_addr32[3], srcp, 2099 dest->s6_addr32[0], dest->s6_addr32[1], 2100 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2101 tw->tw_substate, 0, 0, 2102 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2103 refcount_read(&tw->tw_refcnt), tw); 2104 } 2105 2106 static int tcp6_seq_show(struct seq_file *seq, void *v) 2107 { 2108 struct tcp_iter_state *st; 2109 struct sock *sk = v; 2110 2111 if (v == SEQ_START_TOKEN) { 2112 seq_puts(seq, 2113 " sl " 2114 "local_address " 2115 "remote_address " 2116 "st tx_queue rx_queue tr tm->when retrnsmt" 2117 " uid timeout inode\n"); 2118 goto out; 2119 } 2120 st = seq->private; 2121 2122 if (sk->sk_state == TCP_TIME_WAIT) 2123 get_timewait6_sock(seq, v, st->num); 2124 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2125 get_openreq6(seq, v, st->num); 2126 else 2127 get_tcp6_sock(seq, v, st->num); 2128 out: 2129 return 0; 2130 } 2131 2132 static const struct seq_operations tcp6_seq_ops = { 2133 .show = tcp6_seq_show, 2134 .start = tcp_seq_start, 2135 .next = tcp_seq_next, 2136 .stop = tcp_seq_stop, 2137 }; 2138 2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2140 .family = AF_INET6, 2141 }; 2142 2143 int __net_init tcp6_proc_init(struct net *net) 2144 { 2145 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2146 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2147 return -ENOMEM; 2148 return 0; 2149 } 2150 2151 void tcp6_proc_exit(struct net *net) 2152 { 2153 remove_proc_entry("tcp6", net->proc_net); 2154 } 2155 #endif 2156 2157 struct proto tcpv6_prot = { 2158 .name = "TCPv6", 2159 .owner = THIS_MODULE, 2160 .close = tcp_close, 2161 .pre_connect = tcp_v6_pre_connect, 2162 .connect = tcp_v6_connect, 2163 .disconnect = tcp_disconnect, 2164 .accept = inet_csk_accept, 2165 .ioctl = tcp_ioctl, 2166 .init = tcp_v6_init_sock, 2167 .destroy = tcp_v6_destroy_sock, 2168 .shutdown = tcp_shutdown, 2169 .setsockopt = tcp_setsockopt, 2170 .getsockopt = tcp_getsockopt, 2171 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2172 .keepalive = tcp_set_keepalive, 2173 .recvmsg = tcp_recvmsg, 2174 .sendmsg = tcp_sendmsg, 2175 .sendpage = tcp_sendpage, 2176 .backlog_rcv = tcp_v6_do_rcv, 2177 .release_cb = tcp_release_cb, 2178 .hash = inet6_hash, 2179 .unhash = inet_unhash, 2180 .get_port = inet_csk_get_port, 2181 #ifdef CONFIG_BPF_SYSCALL 2182 .psock_update_sk_prot = tcp_bpf_update_proto, 2183 #endif 2184 .enter_memory_pressure = tcp_enter_memory_pressure, 2185 .leave_memory_pressure = tcp_leave_memory_pressure, 2186 .stream_memory_free = tcp_stream_memory_free, 2187 .sockets_allocated = &tcp_sockets_allocated, 2188 .memory_allocated = &tcp_memory_allocated, 2189 .memory_pressure = &tcp_memory_pressure, 2190 .orphan_count = &tcp_orphan_count, 2191 .sysctl_mem = sysctl_tcp_mem, 2192 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2193 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2194 .max_header = MAX_TCP_HEADER, 2195 .obj_size = sizeof(struct tcp6_sock), 2196 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2197 .twsk_prot = &tcp6_timewait_sock_ops, 2198 .rsk_prot = &tcp6_request_sock_ops, 2199 .h.hashinfo = &tcp_hashinfo, 2200 .no_autobind = true, 2201 .diag_destroy = tcp_abort, 2202 }; 2203 EXPORT_SYMBOL_GPL(tcpv6_prot); 2204 2205 /* thinking of making this const? Don't. 2206 * early_demux can change based on sysctl. 2207 */ 2208 static struct inet6_protocol tcpv6_protocol = { 2209 .early_demux = tcp_v6_early_demux, 2210 .early_demux_handler = tcp_v6_early_demux, 2211 .handler = tcp_v6_rcv, 2212 .err_handler = tcp_v6_err, 2213 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2214 }; 2215 2216 static struct inet_protosw tcpv6_protosw = { 2217 .type = SOCK_STREAM, 2218 .protocol = IPPROTO_TCP, 2219 .prot = &tcpv6_prot, 2220 .ops = &inet6_stream_ops, 2221 .flags = INET_PROTOSW_PERMANENT | 2222 INET_PROTOSW_ICSK, 2223 }; 2224 2225 static int __net_init tcpv6_net_init(struct net *net) 2226 { 2227 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2228 SOCK_RAW, IPPROTO_TCP, net); 2229 } 2230 2231 static void __net_exit tcpv6_net_exit(struct net *net) 2232 { 2233 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2234 } 2235 2236 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2237 { 2238 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2239 } 2240 2241 static struct pernet_operations tcpv6_net_ops = { 2242 .init = tcpv6_net_init, 2243 .exit = tcpv6_net_exit, 2244 .exit_batch = tcpv6_net_exit_batch, 2245 }; 2246 2247 int __init tcpv6_init(void) 2248 { 2249 int ret; 2250 2251 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2252 if (ret) 2253 goto out; 2254 2255 /* register inet6 protocol */ 2256 ret = inet6_register_protosw(&tcpv6_protosw); 2257 if (ret) 2258 goto out_tcpv6_protocol; 2259 2260 ret = register_pernet_subsys(&tcpv6_net_ops); 2261 if (ret) 2262 goto out_tcpv6_protosw; 2263 2264 ret = mptcpv6_init(); 2265 if (ret) 2266 goto out_tcpv6_pernet_subsys; 2267 2268 out: 2269 return ret; 2270 2271 out_tcpv6_pernet_subsys: 2272 unregister_pernet_subsys(&tcpv6_net_ops); 2273 out_tcpv6_protosw: 2274 inet6_unregister_protosw(&tcpv6_protosw); 2275 out_tcpv6_protocol: 2276 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2277 goto out; 2278 } 2279 2280 void tcpv6_exit(void) 2281 { 2282 unregister_pernet_subsys(&tcpv6_net_ops); 2283 inet6_unregister_protosw(&tcpv6_protosw); 2284 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2285 } 2286