1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct tcp_sock *tp = tcp_sk(sk); 154 struct in6_addr *saddr = NULL, *final_p, final; 155 struct ipv6_txoptions *opt; 156 struct flowi6 fl6; 157 struct dst_entry *dst; 158 int addr_type; 159 int err; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; 312 err = inet6_hash_connect(tcp_death_row, sk); 313 if (err) 314 goto late_failure; 315 316 sk_set_txhash(sk); 317 318 if (likely(!tp->repair)) { 319 if (!tp->write_seq) 320 WRITE_ONCE(tp->write_seq, 321 secure_tcpv6_seq(np->saddr.s6_addr32, 322 sk->sk_v6_daddr.s6_addr32, 323 inet->inet_sport, 324 inet->inet_dport)); 325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 326 np->saddr.s6_addr32, 327 sk->sk_v6_daddr.s6_addr32); 328 } 329 330 if (tcp_fastopen_defer_connect(sk, &err)) 331 return err; 332 if (err) 333 goto late_failure; 334 335 err = tcp_connect(sk); 336 if (err) 337 goto late_failure; 338 339 return 0; 340 341 late_failure: 342 tcp_set_state(sk, TCP_CLOSE); 343 failure: 344 inet->inet_dport = 0; 345 sk->sk_route_caps = 0; 346 return err; 347 } 348 349 static void tcp_v6_mtu_reduced(struct sock *sk) 350 { 351 struct dst_entry *dst; 352 u32 mtu; 353 354 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 355 return; 356 357 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 358 359 /* Drop requests trying to increase our current mss. 360 * Check done in __ip6_rt_update_pmtu() is too late. 361 */ 362 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 363 return; 364 365 dst = inet6_csk_update_pmtu(sk, mtu); 366 if (!dst) 367 return; 368 369 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 370 tcp_sync_mss(sk, dst_mtu(dst)); 371 tcp_simple_retransmit(sk); 372 } 373 } 374 375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 376 u8 type, u8 code, int offset, __be32 info) 377 { 378 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 379 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 380 struct net *net = dev_net(skb->dev); 381 struct request_sock *fastopen; 382 struct ipv6_pinfo *np; 383 struct tcp_sock *tp; 384 __u32 seq, snd_una; 385 struct sock *sk; 386 bool fatal; 387 int err; 388 389 sk = __inet6_lookup_established(net, &tcp_hashinfo, 390 &hdr->daddr, th->dest, 391 &hdr->saddr, ntohs(th->source), 392 skb->dev->ifindex, inet6_sdif(skb)); 393 394 if (!sk) { 395 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 396 ICMP6_MIB_INERRORS); 397 return -ENOENT; 398 } 399 400 if (sk->sk_state == TCP_TIME_WAIT) { 401 inet_twsk_put(inet_twsk(sk)); 402 return 0; 403 } 404 seq = ntohl(th->seq); 405 fatal = icmpv6_err_convert(type, code, &err); 406 if (sk->sk_state == TCP_NEW_SYN_RECV) { 407 tcp_req_err(sk, seq, fatal); 408 return 0; 409 } 410 411 bh_lock_sock(sk); 412 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 413 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 414 415 if (sk->sk_state == TCP_CLOSE) 416 goto out; 417 418 if (static_branch_unlikely(&ip6_min_hopcount)) { 419 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 420 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 421 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 422 goto out; 423 } 424 } 425 426 tp = tcp_sk(sk); 427 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 428 fastopen = rcu_dereference(tp->fastopen_rsk); 429 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 430 if (sk->sk_state != TCP_LISTEN && 431 !between(seq, snd_una, tp->snd_nxt)) { 432 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 433 goto out; 434 } 435 436 np = tcp_inet6_sk(sk); 437 438 if (type == NDISC_REDIRECT) { 439 if (!sock_owned_by_user(sk)) { 440 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 441 442 if (dst) 443 dst->ops->redirect(dst, sk, skb); 444 } 445 goto out; 446 } 447 448 if (type == ICMPV6_PKT_TOOBIG) { 449 u32 mtu = ntohl(info); 450 451 /* We are not interested in TCP_LISTEN and open_requests 452 * (SYN-ACKs send out by Linux are always <576bytes so 453 * they should go through unfragmented). 454 */ 455 if (sk->sk_state == TCP_LISTEN) 456 goto out; 457 458 if (!ip6_sk_accept_pmtu(sk)) 459 goto out; 460 461 if (mtu < IPV6_MIN_MTU) 462 goto out; 463 464 WRITE_ONCE(tp->mtu_info, mtu); 465 466 if (!sock_owned_by_user(sk)) 467 tcp_v6_mtu_reduced(sk); 468 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 469 &sk->sk_tsq_flags)) 470 sock_hold(sk); 471 goto out; 472 } 473 474 475 /* Might be for an request_sock */ 476 switch (sk->sk_state) { 477 case TCP_SYN_SENT: 478 case TCP_SYN_RECV: 479 /* Only in fast or simultaneous open. If a fast open socket is 480 * already accepted it is treated as a connected one below. 481 */ 482 if (fastopen && !fastopen->sk) 483 break; 484 485 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 486 487 if (!sock_owned_by_user(sk)) { 488 sk->sk_err = err; 489 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 490 491 tcp_done(sk); 492 } else 493 sk->sk_err_soft = err; 494 goto out; 495 case TCP_LISTEN: 496 break; 497 default: 498 /* check if this ICMP message allows revert of backoff. 499 * (see RFC 6069) 500 */ 501 if (!fastopen && type == ICMPV6_DEST_UNREACH && 502 code == ICMPV6_NOROUTE) 503 tcp_ld_RTO_revert(sk, seq); 504 } 505 506 if (!sock_owned_by_user(sk) && np->recverr) { 507 sk->sk_err = err; 508 sk_error_report(sk); 509 } else 510 sk->sk_err_soft = err; 511 512 out: 513 bh_unlock_sock(sk); 514 sock_put(sk); 515 return 0; 516 } 517 518 519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 520 struct flowi *fl, 521 struct request_sock *req, 522 struct tcp_fastopen_cookie *foc, 523 enum tcp_synack_type synack_type, 524 struct sk_buff *syn_skb) 525 { 526 struct inet_request_sock *ireq = inet_rsk(req); 527 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 528 struct ipv6_txoptions *opt; 529 struct flowi6 *fl6 = &fl->u.ip6; 530 struct sk_buff *skb; 531 int err = -ENOMEM; 532 u8 tclass; 533 534 /* First, grab a route. */ 535 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 536 IPPROTO_TCP)) == NULL) 537 goto done; 538 539 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 540 541 if (skb) { 542 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 543 &ireq->ir_v6_rmt_addr); 544 545 fl6->daddr = ireq->ir_v6_rmt_addr; 546 if (np->repflow && ireq->pktopts) 547 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 548 549 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 550 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 551 (np->tclass & INET_ECN_MASK) : 552 np->tclass; 553 554 if (!INET_ECN_is_capable(tclass) && 555 tcp_bpf_ca_needs_ecn((struct sock *)req)) 556 tclass |= INET_ECN_ECT_0; 557 558 rcu_read_lock(); 559 opt = ireq->ipv6_opt; 560 if (!opt) 561 opt = rcu_dereference(np->opt); 562 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 563 tclass, sk->sk_priority); 564 rcu_read_unlock(); 565 err = net_xmit_eval(err); 566 } 567 568 done: 569 return err; 570 } 571 572 573 static void tcp_v6_reqsk_destructor(struct request_sock *req) 574 { 575 kfree(inet_rsk(req)->ipv6_opt); 576 consume_skb(inet_rsk(req)->pktopts); 577 } 578 579 #ifdef CONFIG_TCP_MD5SIG 580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 581 const struct in6_addr *addr, 582 int l3index) 583 { 584 return tcp_md5_do_lookup(sk, l3index, 585 (union tcp_md5_addr *)addr, AF_INET6); 586 } 587 588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 589 const struct sock *addr_sk) 590 { 591 int l3index; 592 593 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 594 addr_sk->sk_bound_dev_if); 595 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 596 l3index); 597 } 598 599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 600 sockptr_t optval, int optlen) 601 { 602 struct tcp_md5sig cmd; 603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 604 int l3index = 0; 605 u8 prefixlen; 606 u8 flags; 607 608 if (optlen < sizeof(cmd)) 609 return -EINVAL; 610 611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 612 return -EFAULT; 613 614 if (sin6->sin6_family != AF_INET6) 615 return -EINVAL; 616 617 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 618 619 if (optname == TCP_MD5SIG_EXT && 620 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 621 prefixlen = cmd.tcpm_prefixlen; 622 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 623 prefixlen > 32)) 624 return -EINVAL; 625 } else { 626 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 627 } 628 629 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 631 struct net_device *dev; 632 633 rcu_read_lock(); 634 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 635 if (dev && netif_is_l3_master(dev)) 636 l3index = dev->ifindex; 637 rcu_read_unlock(); 638 639 /* ok to reference set/not set outside of rcu; 640 * right now device MUST be an L3 master 641 */ 642 if (!dev || !l3index) 643 return -EINVAL; 644 } 645 646 if (!cmd.tcpm_keylen) { 647 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 648 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 649 AF_INET, prefixlen, 650 l3index, flags); 651 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 652 AF_INET6, prefixlen, l3index, flags); 653 } 654 655 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 656 return -EINVAL; 657 658 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 659 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 660 AF_INET, prefixlen, l3index, flags, 661 cmd.tcpm_key, cmd.tcpm_keylen, 662 GFP_KERNEL); 663 664 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 665 AF_INET6, prefixlen, l3index, flags, 666 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 667 } 668 669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 670 const struct in6_addr *daddr, 671 const struct in6_addr *saddr, 672 const struct tcphdr *th, int nbytes) 673 { 674 struct tcp6_pseudohdr *bp; 675 struct scatterlist sg; 676 struct tcphdr *_th; 677 678 bp = hp->scratch; 679 /* 1. TCP pseudo-header (RFC2460) */ 680 bp->saddr = *saddr; 681 bp->daddr = *daddr; 682 bp->protocol = cpu_to_be32(IPPROTO_TCP); 683 bp->len = cpu_to_be32(nbytes); 684 685 _th = (struct tcphdr *)(bp + 1); 686 memcpy(_th, th, sizeof(*th)); 687 _th->check = 0; 688 689 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 690 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 691 sizeof(*bp) + sizeof(*th)); 692 return crypto_ahash_update(hp->md5_req); 693 } 694 695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 696 const struct in6_addr *daddr, struct in6_addr *saddr, 697 const struct tcphdr *th) 698 { 699 struct tcp_md5sig_pool *hp; 700 struct ahash_request *req; 701 702 hp = tcp_get_md5sig_pool(); 703 if (!hp) 704 goto clear_hash_noput; 705 req = hp->md5_req; 706 707 if (crypto_ahash_init(req)) 708 goto clear_hash; 709 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 710 goto clear_hash; 711 if (tcp_md5_hash_key(hp, key)) 712 goto clear_hash; 713 ahash_request_set_crypt(req, NULL, md5_hash, 0); 714 if (crypto_ahash_final(req)) 715 goto clear_hash; 716 717 tcp_put_md5sig_pool(); 718 return 0; 719 720 clear_hash: 721 tcp_put_md5sig_pool(); 722 clear_hash_noput: 723 memset(md5_hash, 0, 16); 724 return 1; 725 } 726 727 static int tcp_v6_md5_hash_skb(char *md5_hash, 728 const struct tcp_md5sig_key *key, 729 const struct sock *sk, 730 const struct sk_buff *skb) 731 { 732 const struct in6_addr *saddr, *daddr; 733 struct tcp_md5sig_pool *hp; 734 struct ahash_request *req; 735 const struct tcphdr *th = tcp_hdr(skb); 736 737 if (sk) { /* valid for establish/request sockets */ 738 saddr = &sk->sk_v6_rcv_saddr; 739 daddr = &sk->sk_v6_daddr; 740 } else { 741 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 742 saddr = &ip6h->saddr; 743 daddr = &ip6h->daddr; 744 } 745 746 hp = tcp_get_md5sig_pool(); 747 if (!hp) 748 goto clear_hash_noput; 749 req = hp->md5_req; 750 751 if (crypto_ahash_init(req)) 752 goto clear_hash; 753 754 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 755 goto clear_hash; 756 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 757 goto clear_hash; 758 if (tcp_md5_hash_key(hp, key)) 759 goto clear_hash; 760 ahash_request_set_crypt(req, NULL, md5_hash, 0); 761 if (crypto_ahash_final(req)) 762 goto clear_hash; 763 764 tcp_put_md5sig_pool(); 765 return 0; 766 767 clear_hash: 768 tcp_put_md5sig_pool(); 769 clear_hash_noput: 770 memset(md5_hash, 0, 16); 771 return 1; 772 } 773 774 #endif 775 776 static void tcp_v6_init_req(struct request_sock *req, 777 const struct sock *sk_listener, 778 struct sk_buff *skb) 779 { 780 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 781 struct inet_request_sock *ireq = inet_rsk(req); 782 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 783 784 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 785 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 786 787 /* So that link locals have meaning */ 788 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 789 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 790 ireq->ir_iif = tcp_v6_iif(skb); 791 792 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 793 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 794 np->rxopt.bits.rxinfo || 795 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 796 np->rxopt.bits.rxohlim || np->repflow)) { 797 refcount_inc(&skb->users); 798 ireq->pktopts = skb; 799 } 800 } 801 802 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 803 struct sk_buff *skb, 804 struct flowi *fl, 805 struct request_sock *req) 806 { 807 tcp_v6_init_req(req, sk, skb); 808 809 if (security_inet_conn_request(sk, skb, req)) 810 return NULL; 811 812 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 813 } 814 815 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 816 .family = AF_INET6, 817 .obj_size = sizeof(struct tcp6_request_sock), 818 .rtx_syn_ack = tcp_rtx_synack, 819 .send_ack = tcp_v6_reqsk_send_ack, 820 .destructor = tcp_v6_reqsk_destructor, 821 .send_reset = tcp_v6_send_reset, 822 .syn_ack_timeout = tcp_syn_ack_timeout, 823 }; 824 825 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 826 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 827 sizeof(struct ipv6hdr), 828 #ifdef CONFIG_TCP_MD5SIG 829 .req_md5_lookup = tcp_v6_md5_lookup, 830 .calc_md5_hash = tcp_v6_md5_hash_skb, 831 #endif 832 #ifdef CONFIG_SYN_COOKIES 833 .cookie_init_seq = cookie_v6_init_sequence, 834 #endif 835 .route_req = tcp_v6_route_req, 836 .init_seq = tcp_v6_init_seq, 837 .init_ts_off = tcp_v6_init_ts_off, 838 .send_synack = tcp_v6_send_synack, 839 }; 840 841 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 842 u32 ack, u32 win, u32 tsval, u32 tsecr, 843 int oif, struct tcp_md5sig_key *key, int rst, 844 u8 tclass, __be32 label, u32 priority) 845 { 846 const struct tcphdr *th = tcp_hdr(skb); 847 struct tcphdr *t1; 848 struct sk_buff *buff; 849 struct flowi6 fl6; 850 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 851 struct sock *ctl_sk = net->ipv6.tcp_sk; 852 unsigned int tot_len = sizeof(struct tcphdr); 853 __be32 mrst = 0, *topt; 854 struct dst_entry *dst; 855 __u32 mark = 0; 856 857 if (tsecr) 858 tot_len += TCPOLEN_TSTAMP_ALIGNED; 859 #ifdef CONFIG_TCP_MD5SIG 860 if (key) 861 tot_len += TCPOLEN_MD5SIG_ALIGNED; 862 #endif 863 864 #ifdef CONFIG_MPTCP 865 if (rst && !key) { 866 mrst = mptcp_reset_option(skb); 867 868 if (mrst) 869 tot_len += sizeof(__be32); 870 } 871 #endif 872 873 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 874 if (!buff) 875 return; 876 877 skb_reserve(buff, MAX_TCP_HEADER); 878 879 t1 = skb_push(buff, tot_len); 880 skb_reset_transport_header(buff); 881 882 /* Swap the send and the receive. */ 883 memset(t1, 0, sizeof(*t1)); 884 t1->dest = th->source; 885 t1->source = th->dest; 886 t1->doff = tot_len / 4; 887 t1->seq = htonl(seq); 888 t1->ack_seq = htonl(ack); 889 t1->ack = !rst || !th->ack; 890 t1->rst = rst; 891 t1->window = htons(win); 892 893 topt = (__be32 *)(t1 + 1); 894 895 if (tsecr) { 896 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 897 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 898 *topt++ = htonl(tsval); 899 *topt++ = htonl(tsecr); 900 } 901 902 if (mrst) 903 *topt++ = mrst; 904 905 #ifdef CONFIG_TCP_MD5SIG 906 if (key) { 907 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 908 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 909 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 910 &ipv6_hdr(skb)->saddr, 911 &ipv6_hdr(skb)->daddr, t1); 912 } 913 #endif 914 915 memset(&fl6, 0, sizeof(fl6)); 916 fl6.daddr = ipv6_hdr(skb)->saddr; 917 fl6.saddr = ipv6_hdr(skb)->daddr; 918 fl6.flowlabel = label; 919 920 buff->ip_summed = CHECKSUM_PARTIAL; 921 922 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 923 924 fl6.flowi6_proto = IPPROTO_TCP; 925 if (rt6_need_strict(&fl6.daddr) && !oif) 926 fl6.flowi6_oif = tcp_v6_iif(skb); 927 else { 928 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 929 oif = skb->skb_iif; 930 931 fl6.flowi6_oif = oif; 932 } 933 934 if (sk) { 935 if (sk->sk_state == TCP_TIME_WAIT) { 936 mark = inet_twsk(sk)->tw_mark; 937 /* autoflowlabel relies on buff->hash */ 938 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 939 PKT_HASH_TYPE_L4); 940 } else { 941 mark = sk->sk_mark; 942 } 943 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 944 } 945 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 946 fl6.fl6_dport = t1->dest; 947 fl6.fl6_sport = t1->source; 948 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 949 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 950 951 /* Pass a socket to ip6_dst_lookup either it is for RST 952 * Underlying function will use this to retrieve the network 953 * namespace 954 */ 955 if (sk && sk->sk_state != TCP_TIME_WAIT) 956 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 957 else 958 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 959 if (!IS_ERR(dst)) { 960 skb_dst_set(buff, dst); 961 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 962 tclass & ~INET_ECN_MASK, priority); 963 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 964 if (rst) 965 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 966 return; 967 } 968 969 kfree_skb(buff); 970 } 971 972 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 973 { 974 const struct tcphdr *th = tcp_hdr(skb); 975 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 976 u32 seq = 0, ack_seq = 0; 977 struct tcp_md5sig_key *key = NULL; 978 #ifdef CONFIG_TCP_MD5SIG 979 const __u8 *hash_location = NULL; 980 unsigned char newhash[16]; 981 int genhash; 982 struct sock *sk1 = NULL; 983 #endif 984 __be32 label = 0; 985 u32 priority = 0; 986 struct net *net; 987 int oif = 0; 988 989 if (th->rst) 990 return; 991 992 /* If sk not NULL, it means we did a successful lookup and incoming 993 * route had to be correct. prequeue might have dropped our dst. 994 */ 995 if (!sk && !ipv6_unicast_destination(skb)) 996 return; 997 998 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 999 #ifdef CONFIG_TCP_MD5SIG 1000 rcu_read_lock(); 1001 hash_location = tcp_parse_md5sig_option(th); 1002 if (sk && sk_fullsock(sk)) { 1003 int l3index; 1004 1005 /* sdif set, means packet ingressed via a device 1006 * in an L3 domain and inet_iif is set to it. 1007 */ 1008 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1009 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1010 } else if (hash_location) { 1011 int dif = tcp_v6_iif_l3_slave(skb); 1012 int sdif = tcp_v6_sdif(skb); 1013 int l3index; 1014 1015 /* 1016 * active side is lost. Try to find listening socket through 1017 * source port, and then find md5 key through listening socket. 1018 * we are not loose security here: 1019 * Incoming packet is checked with md5 hash with finding key, 1020 * no RST generated if md5 hash doesn't match. 1021 */ 1022 sk1 = inet6_lookup_listener(net, 1023 &tcp_hashinfo, NULL, 0, 1024 &ipv6h->saddr, 1025 th->source, &ipv6h->daddr, 1026 ntohs(th->source), dif, sdif); 1027 if (!sk1) 1028 goto out; 1029 1030 /* sdif set, means packet ingressed via a device 1031 * in an L3 domain and dif is set to it. 1032 */ 1033 l3index = tcp_v6_sdif(skb) ? dif : 0; 1034 1035 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1036 if (!key) 1037 goto out; 1038 1039 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1040 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1041 goto out; 1042 } 1043 #endif 1044 1045 if (th->ack) 1046 seq = ntohl(th->ack_seq); 1047 else 1048 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1049 (th->doff << 2); 1050 1051 if (sk) { 1052 oif = sk->sk_bound_dev_if; 1053 if (sk_fullsock(sk)) { 1054 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1055 1056 trace_tcp_send_reset(sk, skb); 1057 if (np->repflow) 1058 label = ip6_flowlabel(ipv6h); 1059 priority = sk->sk_priority; 1060 } 1061 if (sk->sk_state == TCP_TIME_WAIT) { 1062 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1063 priority = inet_twsk(sk)->tw_priority; 1064 } 1065 } else { 1066 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1067 label = ip6_flowlabel(ipv6h); 1068 } 1069 1070 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1071 ipv6_get_dsfield(ipv6h), label, priority); 1072 1073 #ifdef CONFIG_TCP_MD5SIG 1074 out: 1075 rcu_read_unlock(); 1076 #endif 1077 } 1078 1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1080 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1081 struct tcp_md5sig_key *key, u8 tclass, 1082 __be32 label, u32 priority) 1083 { 1084 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1085 tclass, label, priority); 1086 } 1087 1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1089 { 1090 struct inet_timewait_sock *tw = inet_twsk(sk); 1091 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1092 1093 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1094 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1095 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1096 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1097 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1098 1099 inet_twsk_put(tw); 1100 } 1101 1102 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1103 struct request_sock *req) 1104 { 1105 int l3index; 1106 1107 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1108 1109 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1110 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1111 */ 1112 /* RFC 7323 2.3 1113 * The window field (SEG.WND) of every outgoing segment, with the 1114 * exception of <SYN> segments, MUST be right-shifted by 1115 * Rcv.Wind.Shift bits: 1116 */ 1117 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1118 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1119 tcp_rsk(req)->rcv_nxt, 1120 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1121 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1122 req->ts_recent, sk->sk_bound_dev_if, 1123 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1124 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1125 } 1126 1127 1128 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1129 { 1130 #ifdef CONFIG_SYN_COOKIES 1131 const struct tcphdr *th = tcp_hdr(skb); 1132 1133 if (!th->syn) 1134 sk = cookie_v6_check(sk, skb); 1135 #endif 1136 return sk; 1137 } 1138 1139 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1140 struct tcphdr *th, u32 *cookie) 1141 { 1142 u16 mss = 0; 1143 #ifdef CONFIG_SYN_COOKIES 1144 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1145 &tcp_request_sock_ipv6_ops, sk, th); 1146 if (mss) { 1147 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1148 tcp_synq_overflow(sk); 1149 } 1150 #endif 1151 return mss; 1152 } 1153 1154 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1155 { 1156 if (skb->protocol == htons(ETH_P_IP)) 1157 return tcp_v4_conn_request(sk, skb); 1158 1159 if (!ipv6_unicast_destination(skb)) 1160 goto drop; 1161 1162 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1163 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1164 return 0; 1165 } 1166 1167 return tcp_conn_request(&tcp6_request_sock_ops, 1168 &tcp_request_sock_ipv6_ops, sk, skb); 1169 1170 drop: 1171 tcp_listendrop(sk); 1172 return 0; /* don't send reset */ 1173 } 1174 1175 static void tcp_v6_restore_cb(struct sk_buff *skb) 1176 { 1177 /* We need to move header back to the beginning if xfrm6_policy_check() 1178 * and tcp_v6_fill_cb() are going to be called again. 1179 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1180 */ 1181 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1182 sizeof(struct inet6_skb_parm)); 1183 } 1184 1185 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1186 struct request_sock *req, 1187 struct dst_entry *dst, 1188 struct request_sock *req_unhash, 1189 bool *own_req) 1190 { 1191 struct inet_request_sock *ireq; 1192 struct ipv6_pinfo *newnp; 1193 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1194 struct ipv6_txoptions *opt; 1195 struct inet_sock *newinet; 1196 bool found_dup_sk = false; 1197 struct tcp_sock *newtp; 1198 struct sock *newsk; 1199 #ifdef CONFIG_TCP_MD5SIG 1200 struct tcp_md5sig_key *key; 1201 int l3index; 1202 #endif 1203 struct flowi6 fl6; 1204 1205 if (skb->protocol == htons(ETH_P_IP)) { 1206 /* 1207 * v6 mapped 1208 */ 1209 1210 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1211 req_unhash, own_req); 1212 1213 if (!newsk) 1214 return NULL; 1215 1216 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1217 1218 newnp = tcp_inet6_sk(newsk); 1219 newtp = tcp_sk(newsk); 1220 1221 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1222 1223 newnp->saddr = newsk->sk_v6_rcv_saddr; 1224 1225 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1226 if (sk_is_mptcp(newsk)) 1227 mptcpv6_handle_mapped(newsk, true); 1228 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1229 #ifdef CONFIG_TCP_MD5SIG 1230 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1231 #endif 1232 1233 newnp->ipv6_mc_list = NULL; 1234 newnp->ipv6_ac_list = NULL; 1235 newnp->ipv6_fl_list = NULL; 1236 newnp->pktoptions = NULL; 1237 newnp->opt = NULL; 1238 newnp->mcast_oif = inet_iif(skb); 1239 newnp->mcast_hops = ip_hdr(skb)->ttl; 1240 newnp->rcv_flowinfo = 0; 1241 if (np->repflow) 1242 newnp->flow_label = 0; 1243 1244 /* 1245 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1246 * here, tcp_create_openreq_child now does this for us, see the comment in 1247 * that function for the gory details. -acme 1248 */ 1249 1250 /* It is tricky place. Until this moment IPv4 tcp 1251 worked with IPv6 icsk.icsk_af_ops. 1252 Sync it now. 1253 */ 1254 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1255 1256 return newsk; 1257 } 1258 1259 ireq = inet_rsk(req); 1260 1261 if (sk_acceptq_is_full(sk)) 1262 goto out_overflow; 1263 1264 if (!dst) { 1265 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1266 if (!dst) 1267 goto out; 1268 } 1269 1270 newsk = tcp_create_openreq_child(sk, req, skb); 1271 if (!newsk) 1272 goto out_nonewsk; 1273 1274 /* 1275 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1276 * count here, tcp_create_openreq_child now does this for us, see the 1277 * comment in that function for the gory details. -acme 1278 */ 1279 1280 newsk->sk_gso_type = SKB_GSO_TCPV6; 1281 ip6_dst_store(newsk, dst, NULL, NULL); 1282 inet6_sk_rx_dst_set(newsk, skb); 1283 1284 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1285 1286 newtp = tcp_sk(newsk); 1287 newinet = inet_sk(newsk); 1288 newnp = tcp_inet6_sk(newsk); 1289 1290 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1291 1292 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1293 newnp->saddr = ireq->ir_v6_loc_addr; 1294 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1295 newsk->sk_bound_dev_if = ireq->ir_iif; 1296 1297 /* Now IPv6 options... 1298 1299 First: no IPv4 options. 1300 */ 1301 newinet->inet_opt = NULL; 1302 newnp->ipv6_mc_list = NULL; 1303 newnp->ipv6_ac_list = NULL; 1304 newnp->ipv6_fl_list = NULL; 1305 1306 /* Clone RX bits */ 1307 newnp->rxopt.all = np->rxopt.all; 1308 1309 newnp->pktoptions = NULL; 1310 newnp->opt = NULL; 1311 newnp->mcast_oif = tcp_v6_iif(skb); 1312 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1313 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1314 if (np->repflow) 1315 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1316 1317 /* Set ToS of the new socket based upon the value of incoming SYN. 1318 * ECT bits are set later in tcp_init_transfer(). 1319 */ 1320 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1321 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1322 1323 /* Clone native IPv6 options from listening socket (if any) 1324 1325 Yes, keeping reference count would be much more clever, 1326 but we make one more one thing there: reattach optmem 1327 to newsk. 1328 */ 1329 opt = ireq->ipv6_opt; 1330 if (!opt) 1331 opt = rcu_dereference(np->opt); 1332 if (opt) { 1333 opt = ipv6_dup_options(newsk, opt); 1334 RCU_INIT_POINTER(newnp->opt, opt); 1335 } 1336 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1337 if (opt) 1338 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1339 opt->opt_flen; 1340 1341 tcp_ca_openreq_child(newsk, dst); 1342 1343 tcp_sync_mss(newsk, dst_mtu(dst)); 1344 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1345 1346 tcp_initialize_rcv_mss(newsk); 1347 1348 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1349 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1350 1351 #ifdef CONFIG_TCP_MD5SIG 1352 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1353 1354 /* Copy over the MD5 key from the original socket */ 1355 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1356 if (key) { 1357 /* We're using one, so create a matching key 1358 * on the newsk structure. If we fail to get 1359 * memory, then we end up not copying the key 1360 * across. Shucks. 1361 */ 1362 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1363 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1364 sk_gfp_mask(sk, GFP_ATOMIC)); 1365 } 1366 #endif 1367 1368 if (__inet_inherit_port(sk, newsk) < 0) { 1369 inet_csk_prepare_forced_close(newsk); 1370 tcp_done(newsk); 1371 goto out; 1372 } 1373 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1374 &found_dup_sk); 1375 if (*own_req) { 1376 tcp_move_syn(newtp, req); 1377 1378 /* Clone pktoptions received with SYN, if we own the req */ 1379 if (ireq->pktopts) { 1380 newnp->pktoptions = skb_clone(ireq->pktopts, 1381 sk_gfp_mask(sk, GFP_ATOMIC)); 1382 consume_skb(ireq->pktopts); 1383 ireq->pktopts = NULL; 1384 if (newnp->pktoptions) { 1385 tcp_v6_restore_cb(newnp->pktoptions); 1386 skb_set_owner_r(newnp->pktoptions, newsk); 1387 } 1388 } 1389 } else { 1390 if (!req_unhash && found_dup_sk) { 1391 /* This code path should only be executed in the 1392 * syncookie case only 1393 */ 1394 bh_unlock_sock(newsk); 1395 sock_put(newsk); 1396 newsk = NULL; 1397 } 1398 } 1399 1400 return newsk; 1401 1402 out_overflow: 1403 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1404 out_nonewsk: 1405 dst_release(dst); 1406 out: 1407 tcp_listendrop(sk); 1408 return NULL; 1409 } 1410 1411 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1412 u32)); 1413 /* The socket must have it's spinlock held when we get 1414 * here, unless it is a TCP_LISTEN socket. 1415 * 1416 * We have a potential double-lock case here, so even when 1417 * doing backlog processing we use the BH locking scheme. 1418 * This is because we cannot sleep with the original spinlock 1419 * held. 1420 */ 1421 INDIRECT_CALLABLE_SCOPE 1422 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1423 { 1424 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1425 struct sk_buff *opt_skb = NULL; 1426 enum skb_drop_reason reason; 1427 struct tcp_sock *tp; 1428 1429 /* Imagine: socket is IPv6. IPv4 packet arrives, 1430 goes to IPv4 receive handler and backlogged. 1431 From backlog it always goes here. Kerboom... 1432 Fortunately, tcp_rcv_established and rcv_established 1433 handle them correctly, but it is not case with 1434 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1435 */ 1436 1437 if (skb->protocol == htons(ETH_P_IP)) 1438 return tcp_v4_do_rcv(sk, skb); 1439 1440 /* 1441 * socket locking is here for SMP purposes as backlog rcv 1442 * is currently called with bh processing disabled. 1443 */ 1444 1445 /* Do Stevens' IPV6_PKTOPTIONS. 1446 1447 Yes, guys, it is the only place in our code, where we 1448 may make it not affecting IPv4. 1449 The rest of code is protocol independent, 1450 and I do not like idea to uglify IPv4. 1451 1452 Actually, all the idea behind IPV6_PKTOPTIONS 1453 looks not very well thought. For now we latch 1454 options, received in the last packet, enqueued 1455 by tcp. Feel free to propose better solution. 1456 --ANK (980728) 1457 */ 1458 if (np->rxopt.all) 1459 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1460 1461 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1462 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1463 struct dst_entry *dst; 1464 1465 dst = rcu_dereference_protected(sk->sk_rx_dst, 1466 lockdep_sock_is_held(sk)); 1467 1468 sock_rps_save_rxhash(sk, skb); 1469 sk_mark_napi_id(sk, skb); 1470 if (dst) { 1471 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1472 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1473 dst, sk->sk_rx_dst_cookie) == NULL) { 1474 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1475 dst_release(dst); 1476 } 1477 } 1478 1479 tcp_rcv_established(sk, skb); 1480 if (opt_skb) 1481 goto ipv6_pktoptions; 1482 return 0; 1483 } 1484 1485 if (tcp_checksum_complete(skb)) 1486 goto csum_err; 1487 1488 if (sk->sk_state == TCP_LISTEN) { 1489 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1490 1491 if (!nsk) 1492 goto discard; 1493 1494 if (nsk != sk) { 1495 if (tcp_child_process(sk, nsk, skb)) 1496 goto reset; 1497 if (opt_skb) 1498 __kfree_skb(opt_skb); 1499 return 0; 1500 } 1501 } else 1502 sock_rps_save_rxhash(sk, skb); 1503 1504 if (tcp_rcv_state_process(sk, skb)) 1505 goto reset; 1506 if (opt_skb) 1507 goto ipv6_pktoptions; 1508 return 0; 1509 1510 reset: 1511 tcp_v6_send_reset(sk, skb); 1512 discard: 1513 if (opt_skb) 1514 __kfree_skb(opt_skb); 1515 kfree_skb_reason(skb, reason); 1516 return 0; 1517 csum_err: 1518 reason = SKB_DROP_REASON_TCP_CSUM; 1519 trace_tcp_bad_csum(skb); 1520 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1521 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1522 goto discard; 1523 1524 1525 ipv6_pktoptions: 1526 /* Do you ask, what is it? 1527 1528 1. skb was enqueued by tcp. 1529 2. skb is added to tail of read queue, rather than out of order. 1530 3. socket is not in passive state. 1531 4. Finally, it really contains options, which user wants to receive. 1532 */ 1533 tp = tcp_sk(sk); 1534 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1535 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1536 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1537 np->mcast_oif = tcp_v6_iif(opt_skb); 1538 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1539 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1540 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1541 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1542 if (np->repflow) 1543 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1544 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1545 skb_set_owner_r(opt_skb, sk); 1546 tcp_v6_restore_cb(opt_skb); 1547 opt_skb = xchg(&np->pktoptions, opt_skb); 1548 } else { 1549 __kfree_skb(opt_skb); 1550 opt_skb = xchg(&np->pktoptions, NULL); 1551 } 1552 } 1553 1554 consume_skb(opt_skb); 1555 return 0; 1556 } 1557 1558 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1559 const struct tcphdr *th) 1560 { 1561 /* This is tricky: we move IP6CB at its correct location into 1562 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1563 * _decode_session6() uses IP6CB(). 1564 * barrier() makes sure compiler won't play aliasing games. 1565 */ 1566 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1567 sizeof(struct inet6_skb_parm)); 1568 barrier(); 1569 1570 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1571 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1572 skb->len - th->doff*4); 1573 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1574 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1575 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1576 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1577 TCP_SKB_CB(skb)->sacked = 0; 1578 TCP_SKB_CB(skb)->has_rxtstamp = 1579 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1580 } 1581 1582 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1583 { 1584 enum skb_drop_reason drop_reason; 1585 int sdif = inet6_sdif(skb); 1586 int dif = inet6_iif(skb); 1587 const struct tcphdr *th; 1588 const struct ipv6hdr *hdr; 1589 bool refcounted; 1590 struct sock *sk; 1591 int ret; 1592 struct net *net = dev_net(skb->dev); 1593 1594 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1595 if (skb->pkt_type != PACKET_HOST) 1596 goto discard_it; 1597 1598 /* 1599 * Count it even if it's bad. 1600 */ 1601 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1602 1603 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1604 goto discard_it; 1605 1606 th = (const struct tcphdr *)skb->data; 1607 1608 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1609 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1610 goto bad_packet; 1611 } 1612 if (!pskb_may_pull(skb, th->doff*4)) 1613 goto discard_it; 1614 1615 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1616 goto csum_error; 1617 1618 th = (const struct tcphdr *)skb->data; 1619 hdr = ipv6_hdr(skb); 1620 1621 lookup: 1622 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1623 th->source, th->dest, inet6_iif(skb), sdif, 1624 &refcounted); 1625 if (!sk) 1626 goto no_tcp_socket; 1627 1628 process: 1629 if (sk->sk_state == TCP_TIME_WAIT) 1630 goto do_time_wait; 1631 1632 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1633 struct request_sock *req = inet_reqsk(sk); 1634 bool req_stolen = false; 1635 struct sock *nsk; 1636 1637 sk = req->rsk_listener; 1638 drop_reason = tcp_inbound_md5_hash(sk, skb, 1639 &hdr->saddr, &hdr->daddr, 1640 AF_INET6, dif, sdif); 1641 if (drop_reason) { 1642 sk_drops_add(sk, skb); 1643 reqsk_put(req); 1644 goto discard_it; 1645 } 1646 if (tcp_checksum_complete(skb)) { 1647 reqsk_put(req); 1648 goto csum_error; 1649 } 1650 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1651 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1652 if (!nsk) { 1653 inet_csk_reqsk_queue_drop_and_put(sk, req); 1654 goto lookup; 1655 } 1656 sk = nsk; 1657 /* reuseport_migrate_sock() has already held one sk_refcnt 1658 * before returning. 1659 */ 1660 } else { 1661 sock_hold(sk); 1662 } 1663 refcounted = true; 1664 nsk = NULL; 1665 if (!tcp_filter(sk, skb)) { 1666 th = (const struct tcphdr *)skb->data; 1667 hdr = ipv6_hdr(skb); 1668 tcp_v6_fill_cb(skb, hdr, th); 1669 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1670 } else { 1671 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1672 } 1673 if (!nsk) { 1674 reqsk_put(req); 1675 if (req_stolen) { 1676 /* Another cpu got exclusive access to req 1677 * and created a full blown socket. 1678 * Try to feed this packet to this socket 1679 * instead of discarding it. 1680 */ 1681 tcp_v6_restore_cb(skb); 1682 sock_put(sk); 1683 goto lookup; 1684 } 1685 goto discard_and_relse; 1686 } 1687 if (nsk == sk) { 1688 reqsk_put(req); 1689 tcp_v6_restore_cb(skb); 1690 } else if (tcp_child_process(sk, nsk, skb)) { 1691 tcp_v6_send_reset(nsk, skb); 1692 goto discard_and_relse; 1693 } else { 1694 sock_put(sk); 1695 return 0; 1696 } 1697 } 1698 1699 if (static_branch_unlikely(&ip6_min_hopcount)) { 1700 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1701 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1702 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1703 goto discard_and_relse; 1704 } 1705 } 1706 1707 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1708 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1709 goto discard_and_relse; 1710 } 1711 1712 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1713 AF_INET6, dif, sdif); 1714 if (drop_reason) 1715 goto discard_and_relse; 1716 1717 if (tcp_filter(sk, skb)) { 1718 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1719 goto discard_and_relse; 1720 } 1721 th = (const struct tcphdr *)skb->data; 1722 hdr = ipv6_hdr(skb); 1723 tcp_v6_fill_cb(skb, hdr, th); 1724 1725 skb->dev = NULL; 1726 1727 if (sk->sk_state == TCP_LISTEN) { 1728 ret = tcp_v6_do_rcv(sk, skb); 1729 goto put_and_return; 1730 } 1731 1732 sk_incoming_cpu_update(sk); 1733 1734 bh_lock_sock_nested(sk); 1735 tcp_segs_in(tcp_sk(sk), skb); 1736 ret = 0; 1737 if (!sock_owned_by_user(sk)) { 1738 ret = tcp_v6_do_rcv(sk, skb); 1739 } else { 1740 if (tcp_add_backlog(sk, skb, &drop_reason)) 1741 goto discard_and_relse; 1742 } 1743 bh_unlock_sock(sk); 1744 put_and_return: 1745 if (refcounted) 1746 sock_put(sk); 1747 return ret ? -1 : 0; 1748 1749 no_tcp_socket: 1750 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1751 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1752 goto discard_it; 1753 1754 tcp_v6_fill_cb(skb, hdr, th); 1755 1756 if (tcp_checksum_complete(skb)) { 1757 csum_error: 1758 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1759 trace_tcp_bad_csum(skb); 1760 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1761 bad_packet: 1762 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1763 } else { 1764 tcp_v6_send_reset(NULL, skb); 1765 } 1766 1767 discard_it: 1768 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1769 kfree_skb_reason(skb, drop_reason); 1770 return 0; 1771 1772 discard_and_relse: 1773 sk_drops_add(sk, skb); 1774 if (refcounted) 1775 sock_put(sk); 1776 goto discard_it; 1777 1778 do_time_wait: 1779 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1780 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1781 inet_twsk_put(inet_twsk(sk)); 1782 goto discard_it; 1783 } 1784 1785 tcp_v6_fill_cb(skb, hdr, th); 1786 1787 if (tcp_checksum_complete(skb)) { 1788 inet_twsk_put(inet_twsk(sk)); 1789 goto csum_error; 1790 } 1791 1792 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1793 case TCP_TW_SYN: 1794 { 1795 struct sock *sk2; 1796 1797 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1798 skb, __tcp_hdrlen(th), 1799 &ipv6_hdr(skb)->saddr, th->source, 1800 &ipv6_hdr(skb)->daddr, 1801 ntohs(th->dest), 1802 tcp_v6_iif_l3_slave(skb), 1803 sdif); 1804 if (sk2) { 1805 struct inet_timewait_sock *tw = inet_twsk(sk); 1806 inet_twsk_deschedule_put(tw); 1807 sk = sk2; 1808 tcp_v6_restore_cb(skb); 1809 refcounted = false; 1810 goto process; 1811 } 1812 } 1813 /* to ACK */ 1814 fallthrough; 1815 case TCP_TW_ACK: 1816 tcp_v6_timewait_ack(sk, skb); 1817 break; 1818 case TCP_TW_RST: 1819 tcp_v6_send_reset(sk, skb); 1820 inet_twsk_deschedule_put(inet_twsk(sk)); 1821 goto discard_it; 1822 case TCP_TW_SUCCESS: 1823 ; 1824 } 1825 goto discard_it; 1826 } 1827 1828 void tcp_v6_early_demux(struct sk_buff *skb) 1829 { 1830 const struct ipv6hdr *hdr; 1831 const struct tcphdr *th; 1832 struct sock *sk; 1833 1834 if (skb->pkt_type != PACKET_HOST) 1835 return; 1836 1837 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1838 return; 1839 1840 hdr = ipv6_hdr(skb); 1841 th = tcp_hdr(skb); 1842 1843 if (th->doff < sizeof(struct tcphdr) / 4) 1844 return; 1845 1846 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1847 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1848 &hdr->saddr, th->source, 1849 &hdr->daddr, ntohs(th->dest), 1850 inet6_iif(skb), inet6_sdif(skb)); 1851 if (sk) { 1852 skb->sk = sk; 1853 skb->destructor = sock_edemux; 1854 if (sk_fullsock(sk)) { 1855 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1856 1857 if (dst) 1858 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1859 if (dst && 1860 sk->sk_rx_dst_ifindex == skb->skb_iif) 1861 skb_dst_set_noref(skb, dst); 1862 } 1863 } 1864 } 1865 1866 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1867 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1868 .twsk_unique = tcp_twsk_unique, 1869 .twsk_destructor = tcp_twsk_destructor, 1870 }; 1871 1872 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1873 { 1874 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1875 } 1876 1877 const struct inet_connection_sock_af_ops ipv6_specific = { 1878 .queue_xmit = inet6_csk_xmit, 1879 .send_check = tcp_v6_send_check, 1880 .rebuild_header = inet6_sk_rebuild_header, 1881 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1882 .conn_request = tcp_v6_conn_request, 1883 .syn_recv_sock = tcp_v6_syn_recv_sock, 1884 .net_header_len = sizeof(struct ipv6hdr), 1885 .net_frag_header_len = sizeof(struct frag_hdr), 1886 .setsockopt = ipv6_setsockopt, 1887 .getsockopt = ipv6_getsockopt, 1888 .addr2sockaddr = inet6_csk_addr2sockaddr, 1889 .sockaddr_len = sizeof(struct sockaddr_in6), 1890 .mtu_reduced = tcp_v6_mtu_reduced, 1891 }; 1892 1893 #ifdef CONFIG_TCP_MD5SIG 1894 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1895 .md5_lookup = tcp_v6_md5_lookup, 1896 .calc_md5_hash = tcp_v6_md5_hash_skb, 1897 .md5_parse = tcp_v6_parse_md5_keys, 1898 }; 1899 #endif 1900 1901 /* 1902 * TCP over IPv4 via INET6 API 1903 */ 1904 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1905 .queue_xmit = ip_queue_xmit, 1906 .send_check = tcp_v4_send_check, 1907 .rebuild_header = inet_sk_rebuild_header, 1908 .sk_rx_dst_set = inet_sk_rx_dst_set, 1909 .conn_request = tcp_v6_conn_request, 1910 .syn_recv_sock = tcp_v6_syn_recv_sock, 1911 .net_header_len = sizeof(struct iphdr), 1912 .setsockopt = ipv6_setsockopt, 1913 .getsockopt = ipv6_getsockopt, 1914 .addr2sockaddr = inet6_csk_addr2sockaddr, 1915 .sockaddr_len = sizeof(struct sockaddr_in6), 1916 .mtu_reduced = tcp_v4_mtu_reduced, 1917 }; 1918 1919 #ifdef CONFIG_TCP_MD5SIG 1920 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1921 .md5_lookup = tcp_v4_md5_lookup, 1922 .calc_md5_hash = tcp_v4_md5_hash_skb, 1923 .md5_parse = tcp_v6_parse_md5_keys, 1924 }; 1925 #endif 1926 1927 /* NOTE: A lot of things set to zero explicitly by call to 1928 * sk_alloc() so need not be done here. 1929 */ 1930 static int tcp_v6_init_sock(struct sock *sk) 1931 { 1932 struct inet_connection_sock *icsk = inet_csk(sk); 1933 1934 tcp_init_sock(sk); 1935 1936 icsk->icsk_af_ops = &ipv6_specific; 1937 1938 #ifdef CONFIG_TCP_MD5SIG 1939 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1940 #endif 1941 1942 return 0; 1943 } 1944 1945 static void tcp_v6_destroy_sock(struct sock *sk) 1946 { 1947 tcp_v4_destroy_sock(sk); 1948 inet6_destroy_sock(sk); 1949 } 1950 1951 #ifdef CONFIG_PROC_FS 1952 /* Proc filesystem TCPv6 sock list dumping. */ 1953 static void get_openreq6(struct seq_file *seq, 1954 const struct request_sock *req, int i) 1955 { 1956 long ttd = req->rsk_timer.expires - jiffies; 1957 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1958 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1959 1960 if (ttd < 0) 1961 ttd = 0; 1962 1963 seq_printf(seq, 1964 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1965 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1966 i, 1967 src->s6_addr32[0], src->s6_addr32[1], 1968 src->s6_addr32[2], src->s6_addr32[3], 1969 inet_rsk(req)->ir_num, 1970 dest->s6_addr32[0], dest->s6_addr32[1], 1971 dest->s6_addr32[2], dest->s6_addr32[3], 1972 ntohs(inet_rsk(req)->ir_rmt_port), 1973 TCP_SYN_RECV, 1974 0, 0, /* could print option size, but that is af dependent. */ 1975 1, /* timers active (only the expire timer) */ 1976 jiffies_to_clock_t(ttd), 1977 req->num_timeout, 1978 from_kuid_munged(seq_user_ns(seq), 1979 sock_i_uid(req->rsk_listener)), 1980 0, /* non standard timer */ 1981 0, /* open_requests have no inode */ 1982 0, req); 1983 } 1984 1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1986 { 1987 const struct in6_addr *dest, *src; 1988 __u16 destp, srcp; 1989 int timer_active; 1990 unsigned long timer_expires; 1991 const struct inet_sock *inet = inet_sk(sp); 1992 const struct tcp_sock *tp = tcp_sk(sp); 1993 const struct inet_connection_sock *icsk = inet_csk(sp); 1994 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1995 int rx_queue; 1996 int state; 1997 1998 dest = &sp->sk_v6_daddr; 1999 src = &sp->sk_v6_rcv_saddr; 2000 destp = ntohs(inet->inet_dport); 2001 srcp = ntohs(inet->inet_sport); 2002 2003 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2004 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2005 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2006 timer_active = 1; 2007 timer_expires = icsk->icsk_timeout; 2008 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2009 timer_active = 4; 2010 timer_expires = icsk->icsk_timeout; 2011 } else if (timer_pending(&sp->sk_timer)) { 2012 timer_active = 2; 2013 timer_expires = sp->sk_timer.expires; 2014 } else { 2015 timer_active = 0; 2016 timer_expires = jiffies; 2017 } 2018 2019 state = inet_sk_state_load(sp); 2020 if (state == TCP_LISTEN) 2021 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2022 else 2023 /* Because we don't lock the socket, 2024 * we might find a transient negative value. 2025 */ 2026 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2027 READ_ONCE(tp->copied_seq), 0); 2028 2029 seq_printf(seq, 2030 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2031 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2032 i, 2033 src->s6_addr32[0], src->s6_addr32[1], 2034 src->s6_addr32[2], src->s6_addr32[3], srcp, 2035 dest->s6_addr32[0], dest->s6_addr32[1], 2036 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2037 state, 2038 READ_ONCE(tp->write_seq) - tp->snd_una, 2039 rx_queue, 2040 timer_active, 2041 jiffies_delta_to_clock_t(timer_expires - jiffies), 2042 icsk->icsk_retransmits, 2043 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2044 icsk->icsk_probes_out, 2045 sock_i_ino(sp), 2046 refcount_read(&sp->sk_refcnt), sp, 2047 jiffies_to_clock_t(icsk->icsk_rto), 2048 jiffies_to_clock_t(icsk->icsk_ack.ato), 2049 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2050 tcp_snd_cwnd(tp), 2051 state == TCP_LISTEN ? 2052 fastopenq->max_qlen : 2053 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2054 ); 2055 } 2056 2057 static void get_timewait6_sock(struct seq_file *seq, 2058 struct inet_timewait_sock *tw, int i) 2059 { 2060 long delta = tw->tw_timer.expires - jiffies; 2061 const struct in6_addr *dest, *src; 2062 __u16 destp, srcp; 2063 2064 dest = &tw->tw_v6_daddr; 2065 src = &tw->tw_v6_rcv_saddr; 2066 destp = ntohs(tw->tw_dport); 2067 srcp = ntohs(tw->tw_sport); 2068 2069 seq_printf(seq, 2070 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2071 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2072 i, 2073 src->s6_addr32[0], src->s6_addr32[1], 2074 src->s6_addr32[2], src->s6_addr32[3], srcp, 2075 dest->s6_addr32[0], dest->s6_addr32[1], 2076 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2077 tw->tw_substate, 0, 0, 2078 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2079 refcount_read(&tw->tw_refcnt), tw); 2080 } 2081 2082 static int tcp6_seq_show(struct seq_file *seq, void *v) 2083 { 2084 struct tcp_iter_state *st; 2085 struct sock *sk = v; 2086 2087 if (v == SEQ_START_TOKEN) { 2088 seq_puts(seq, 2089 " sl " 2090 "local_address " 2091 "remote_address " 2092 "st tx_queue rx_queue tr tm->when retrnsmt" 2093 " uid timeout inode\n"); 2094 goto out; 2095 } 2096 st = seq->private; 2097 2098 if (sk->sk_state == TCP_TIME_WAIT) 2099 get_timewait6_sock(seq, v, st->num); 2100 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2101 get_openreq6(seq, v, st->num); 2102 else 2103 get_tcp6_sock(seq, v, st->num); 2104 out: 2105 return 0; 2106 } 2107 2108 static const struct seq_operations tcp6_seq_ops = { 2109 .show = tcp6_seq_show, 2110 .start = tcp_seq_start, 2111 .next = tcp_seq_next, 2112 .stop = tcp_seq_stop, 2113 }; 2114 2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2116 .family = AF_INET6, 2117 }; 2118 2119 int __net_init tcp6_proc_init(struct net *net) 2120 { 2121 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2122 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2123 return -ENOMEM; 2124 return 0; 2125 } 2126 2127 void tcp6_proc_exit(struct net *net) 2128 { 2129 remove_proc_entry("tcp6", net->proc_net); 2130 } 2131 #endif 2132 2133 struct proto tcpv6_prot = { 2134 .name = "TCPv6", 2135 .owner = THIS_MODULE, 2136 .close = tcp_close, 2137 .pre_connect = tcp_v6_pre_connect, 2138 .connect = tcp_v6_connect, 2139 .disconnect = tcp_disconnect, 2140 .accept = inet_csk_accept, 2141 .ioctl = tcp_ioctl, 2142 .init = tcp_v6_init_sock, 2143 .destroy = tcp_v6_destroy_sock, 2144 .shutdown = tcp_shutdown, 2145 .setsockopt = tcp_setsockopt, 2146 .getsockopt = tcp_getsockopt, 2147 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2148 .keepalive = tcp_set_keepalive, 2149 .recvmsg = tcp_recvmsg, 2150 .sendmsg = tcp_sendmsg, 2151 .sendpage = tcp_sendpage, 2152 .backlog_rcv = tcp_v6_do_rcv, 2153 .release_cb = tcp_release_cb, 2154 .hash = inet6_hash, 2155 .unhash = inet_unhash, 2156 .get_port = inet_csk_get_port, 2157 .put_port = inet_put_port, 2158 #ifdef CONFIG_BPF_SYSCALL 2159 .psock_update_sk_prot = tcp_bpf_update_proto, 2160 #endif 2161 .enter_memory_pressure = tcp_enter_memory_pressure, 2162 .leave_memory_pressure = tcp_leave_memory_pressure, 2163 .stream_memory_free = tcp_stream_memory_free, 2164 .sockets_allocated = &tcp_sockets_allocated, 2165 2166 .memory_allocated = &tcp_memory_allocated, 2167 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2168 2169 .memory_pressure = &tcp_memory_pressure, 2170 .orphan_count = &tcp_orphan_count, 2171 .sysctl_mem = sysctl_tcp_mem, 2172 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2173 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2174 .max_header = MAX_TCP_HEADER, 2175 .obj_size = sizeof(struct tcp6_sock), 2176 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2177 .twsk_prot = &tcp6_timewait_sock_ops, 2178 .rsk_prot = &tcp6_request_sock_ops, 2179 .h.hashinfo = &tcp_hashinfo, 2180 .no_autobind = true, 2181 .diag_destroy = tcp_abort, 2182 }; 2183 EXPORT_SYMBOL_GPL(tcpv6_prot); 2184 2185 static const struct inet6_protocol tcpv6_protocol = { 2186 .handler = tcp_v6_rcv, 2187 .err_handler = tcp_v6_err, 2188 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2189 }; 2190 2191 static struct inet_protosw tcpv6_protosw = { 2192 .type = SOCK_STREAM, 2193 .protocol = IPPROTO_TCP, 2194 .prot = &tcpv6_prot, 2195 .ops = &inet6_stream_ops, 2196 .flags = INET_PROTOSW_PERMANENT | 2197 INET_PROTOSW_ICSK, 2198 }; 2199 2200 static int __net_init tcpv6_net_init(struct net *net) 2201 { 2202 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2203 SOCK_RAW, IPPROTO_TCP, net); 2204 } 2205 2206 static void __net_exit tcpv6_net_exit(struct net *net) 2207 { 2208 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2209 } 2210 2211 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2212 { 2213 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2214 } 2215 2216 static struct pernet_operations tcpv6_net_ops = { 2217 .init = tcpv6_net_init, 2218 .exit = tcpv6_net_exit, 2219 .exit_batch = tcpv6_net_exit_batch, 2220 }; 2221 2222 int __init tcpv6_init(void) 2223 { 2224 int ret; 2225 2226 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2227 if (ret) 2228 goto out; 2229 2230 /* register inet6 protocol */ 2231 ret = inet6_register_protosw(&tcpv6_protosw); 2232 if (ret) 2233 goto out_tcpv6_protocol; 2234 2235 ret = register_pernet_subsys(&tcpv6_net_ops); 2236 if (ret) 2237 goto out_tcpv6_protosw; 2238 2239 ret = mptcpv6_init(); 2240 if (ret) 2241 goto out_tcpv6_pernet_subsys; 2242 2243 out: 2244 return ret; 2245 2246 out_tcpv6_pernet_subsys: 2247 unregister_pernet_subsys(&tcpv6_net_ops); 2248 out_tcpv6_protosw: 2249 inet6_unregister_protosw(&tcpv6_protosw); 2250 out_tcpv6_protocol: 2251 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2252 goto out; 2253 } 2254 2255 void tcpv6_exit(void) 2256 { 2257 unregister_pernet_subsys(&tcpv6_net_ops); 2258 inet6_unregister_protosw(&tcpv6_protosw); 2259 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2260 } 2261