1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct tcp_sock *tp = tcp_sk(sk); 154 struct in6_addr *saddr = NULL, *final_p, final; 155 struct ipv6_txoptions *opt; 156 struct flowi6 fl6; 157 struct dst_entry *dst; 158 int addr_type; 159 int err; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; 312 err = inet6_hash_connect(tcp_death_row, sk); 313 if (err) 314 goto late_failure; 315 316 sk_set_txhash(sk); 317 318 if (likely(!tp->repair)) { 319 if (!tp->write_seq) 320 WRITE_ONCE(tp->write_seq, 321 secure_tcpv6_seq(np->saddr.s6_addr32, 322 sk->sk_v6_daddr.s6_addr32, 323 inet->inet_sport, 324 inet->inet_dport)); 325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 326 np->saddr.s6_addr32, 327 sk->sk_v6_daddr.s6_addr32); 328 } 329 330 if (tcp_fastopen_defer_connect(sk, &err)) 331 return err; 332 if (err) 333 goto late_failure; 334 335 err = tcp_connect(sk); 336 if (err) 337 goto late_failure; 338 339 return 0; 340 341 late_failure: 342 tcp_set_state(sk, TCP_CLOSE); 343 failure: 344 inet->inet_dport = 0; 345 sk->sk_route_caps = 0; 346 return err; 347 } 348 349 static void tcp_v6_mtu_reduced(struct sock *sk) 350 { 351 struct dst_entry *dst; 352 u32 mtu; 353 354 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 355 return; 356 357 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 358 359 /* Drop requests trying to increase our current mss. 360 * Check done in __ip6_rt_update_pmtu() is too late. 361 */ 362 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 363 return; 364 365 dst = inet6_csk_update_pmtu(sk, mtu); 366 if (!dst) 367 return; 368 369 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 370 tcp_sync_mss(sk, dst_mtu(dst)); 371 tcp_simple_retransmit(sk); 372 } 373 } 374 375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 376 u8 type, u8 code, int offset, __be32 info) 377 { 378 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 379 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 380 struct net *net = dev_net(skb->dev); 381 struct request_sock *fastopen; 382 struct ipv6_pinfo *np; 383 struct tcp_sock *tp; 384 __u32 seq, snd_una; 385 struct sock *sk; 386 bool fatal; 387 int err; 388 389 sk = __inet6_lookup_established(net, &tcp_hashinfo, 390 &hdr->daddr, th->dest, 391 &hdr->saddr, ntohs(th->source), 392 skb->dev->ifindex, inet6_sdif(skb)); 393 394 if (!sk) { 395 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 396 ICMP6_MIB_INERRORS); 397 return -ENOENT; 398 } 399 400 if (sk->sk_state == TCP_TIME_WAIT) { 401 inet_twsk_put(inet_twsk(sk)); 402 return 0; 403 } 404 seq = ntohl(th->seq); 405 fatal = icmpv6_err_convert(type, code, &err); 406 if (sk->sk_state == TCP_NEW_SYN_RECV) { 407 tcp_req_err(sk, seq, fatal); 408 return 0; 409 } 410 411 bh_lock_sock(sk); 412 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 413 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 414 415 if (sk->sk_state == TCP_CLOSE) 416 goto out; 417 418 if (static_branch_unlikely(&ip6_min_hopcount)) { 419 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 420 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 421 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 422 goto out; 423 } 424 } 425 426 tp = tcp_sk(sk); 427 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 428 fastopen = rcu_dereference(tp->fastopen_rsk); 429 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 430 if (sk->sk_state != TCP_LISTEN && 431 !between(seq, snd_una, tp->snd_nxt)) { 432 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 433 goto out; 434 } 435 436 np = tcp_inet6_sk(sk); 437 438 if (type == NDISC_REDIRECT) { 439 if (!sock_owned_by_user(sk)) { 440 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 441 442 if (dst) 443 dst->ops->redirect(dst, sk, skb); 444 } 445 goto out; 446 } 447 448 if (type == ICMPV6_PKT_TOOBIG) { 449 u32 mtu = ntohl(info); 450 451 /* We are not interested in TCP_LISTEN and open_requests 452 * (SYN-ACKs send out by Linux are always <576bytes so 453 * they should go through unfragmented). 454 */ 455 if (sk->sk_state == TCP_LISTEN) 456 goto out; 457 458 if (!ip6_sk_accept_pmtu(sk)) 459 goto out; 460 461 if (mtu < IPV6_MIN_MTU) 462 goto out; 463 464 WRITE_ONCE(tp->mtu_info, mtu); 465 466 if (!sock_owned_by_user(sk)) 467 tcp_v6_mtu_reduced(sk); 468 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 469 &sk->sk_tsq_flags)) 470 sock_hold(sk); 471 goto out; 472 } 473 474 475 /* Might be for an request_sock */ 476 switch (sk->sk_state) { 477 case TCP_SYN_SENT: 478 case TCP_SYN_RECV: 479 /* Only in fast or simultaneous open. If a fast open socket is 480 * already accepted it is treated as a connected one below. 481 */ 482 if (fastopen && !fastopen->sk) 483 break; 484 485 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 486 487 if (!sock_owned_by_user(sk)) { 488 sk->sk_err = err; 489 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 490 491 tcp_done(sk); 492 } else 493 sk->sk_err_soft = err; 494 goto out; 495 case TCP_LISTEN: 496 break; 497 default: 498 /* check if this ICMP message allows revert of backoff. 499 * (see RFC 6069) 500 */ 501 if (!fastopen && type == ICMPV6_DEST_UNREACH && 502 code == ICMPV6_NOROUTE) 503 tcp_ld_RTO_revert(sk, seq); 504 } 505 506 if (!sock_owned_by_user(sk) && np->recverr) { 507 sk->sk_err = err; 508 sk_error_report(sk); 509 } else 510 sk->sk_err_soft = err; 511 512 out: 513 bh_unlock_sock(sk); 514 sock_put(sk); 515 return 0; 516 } 517 518 519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 520 struct flowi *fl, 521 struct request_sock *req, 522 struct tcp_fastopen_cookie *foc, 523 enum tcp_synack_type synack_type, 524 struct sk_buff *syn_skb) 525 { 526 struct inet_request_sock *ireq = inet_rsk(req); 527 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 528 struct ipv6_txoptions *opt; 529 struct flowi6 *fl6 = &fl->u.ip6; 530 struct sk_buff *skb; 531 int err = -ENOMEM; 532 u8 tclass; 533 534 /* First, grab a route. */ 535 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 536 IPPROTO_TCP)) == NULL) 537 goto done; 538 539 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 540 541 if (skb) { 542 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 543 &ireq->ir_v6_rmt_addr); 544 545 fl6->daddr = ireq->ir_v6_rmt_addr; 546 if (np->repflow && ireq->pktopts) 547 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 548 549 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 550 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 551 (np->tclass & INET_ECN_MASK) : 552 np->tclass; 553 554 if (!INET_ECN_is_capable(tclass) && 555 tcp_bpf_ca_needs_ecn((struct sock *)req)) 556 tclass |= INET_ECN_ECT_0; 557 558 rcu_read_lock(); 559 opt = ireq->ipv6_opt; 560 if (!opt) 561 opt = rcu_dereference(np->opt); 562 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 563 tclass, sk->sk_priority); 564 rcu_read_unlock(); 565 err = net_xmit_eval(err); 566 } 567 568 done: 569 return err; 570 } 571 572 573 static void tcp_v6_reqsk_destructor(struct request_sock *req) 574 { 575 kfree(inet_rsk(req)->ipv6_opt); 576 consume_skb(inet_rsk(req)->pktopts); 577 } 578 579 #ifdef CONFIG_TCP_MD5SIG 580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 581 const struct in6_addr *addr, 582 int l3index) 583 { 584 return tcp_md5_do_lookup(sk, l3index, 585 (union tcp_md5_addr *)addr, AF_INET6); 586 } 587 588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 589 const struct sock *addr_sk) 590 { 591 int l3index; 592 593 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 594 addr_sk->sk_bound_dev_if); 595 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 596 l3index); 597 } 598 599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 600 sockptr_t optval, int optlen) 601 { 602 struct tcp_md5sig cmd; 603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 604 int l3index = 0; 605 u8 prefixlen; 606 u8 flags; 607 608 if (optlen < sizeof(cmd)) 609 return -EINVAL; 610 611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 612 return -EFAULT; 613 614 if (sin6->sin6_family != AF_INET6) 615 return -EINVAL; 616 617 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 618 619 if (optname == TCP_MD5SIG_EXT && 620 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 621 prefixlen = cmd.tcpm_prefixlen; 622 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 623 prefixlen > 32)) 624 return -EINVAL; 625 } else { 626 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 627 } 628 629 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 631 struct net_device *dev; 632 633 rcu_read_lock(); 634 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 635 if (dev && netif_is_l3_master(dev)) 636 l3index = dev->ifindex; 637 rcu_read_unlock(); 638 639 /* ok to reference set/not set outside of rcu; 640 * right now device MUST be an L3 master 641 */ 642 if (!dev || !l3index) 643 return -EINVAL; 644 } 645 646 if (!cmd.tcpm_keylen) { 647 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 648 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 649 AF_INET, prefixlen, 650 l3index, flags); 651 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 652 AF_INET6, prefixlen, l3index, flags); 653 } 654 655 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 656 return -EINVAL; 657 658 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 659 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 660 AF_INET, prefixlen, l3index, flags, 661 cmd.tcpm_key, cmd.tcpm_keylen, 662 GFP_KERNEL); 663 664 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 665 AF_INET6, prefixlen, l3index, flags, 666 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 667 } 668 669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 670 const struct in6_addr *daddr, 671 const struct in6_addr *saddr, 672 const struct tcphdr *th, int nbytes) 673 { 674 struct tcp6_pseudohdr *bp; 675 struct scatterlist sg; 676 struct tcphdr *_th; 677 678 bp = hp->scratch; 679 /* 1. TCP pseudo-header (RFC2460) */ 680 bp->saddr = *saddr; 681 bp->daddr = *daddr; 682 bp->protocol = cpu_to_be32(IPPROTO_TCP); 683 bp->len = cpu_to_be32(nbytes); 684 685 _th = (struct tcphdr *)(bp + 1); 686 memcpy(_th, th, sizeof(*th)); 687 _th->check = 0; 688 689 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 690 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 691 sizeof(*bp) + sizeof(*th)); 692 return crypto_ahash_update(hp->md5_req); 693 } 694 695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 696 const struct in6_addr *daddr, struct in6_addr *saddr, 697 const struct tcphdr *th) 698 { 699 struct tcp_md5sig_pool *hp; 700 struct ahash_request *req; 701 702 hp = tcp_get_md5sig_pool(); 703 if (!hp) 704 goto clear_hash_noput; 705 req = hp->md5_req; 706 707 if (crypto_ahash_init(req)) 708 goto clear_hash; 709 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 710 goto clear_hash; 711 if (tcp_md5_hash_key(hp, key)) 712 goto clear_hash; 713 ahash_request_set_crypt(req, NULL, md5_hash, 0); 714 if (crypto_ahash_final(req)) 715 goto clear_hash; 716 717 tcp_put_md5sig_pool(); 718 return 0; 719 720 clear_hash: 721 tcp_put_md5sig_pool(); 722 clear_hash_noput: 723 memset(md5_hash, 0, 16); 724 return 1; 725 } 726 727 static int tcp_v6_md5_hash_skb(char *md5_hash, 728 const struct tcp_md5sig_key *key, 729 const struct sock *sk, 730 const struct sk_buff *skb) 731 { 732 const struct in6_addr *saddr, *daddr; 733 struct tcp_md5sig_pool *hp; 734 struct ahash_request *req; 735 const struct tcphdr *th = tcp_hdr(skb); 736 737 if (sk) { /* valid for establish/request sockets */ 738 saddr = &sk->sk_v6_rcv_saddr; 739 daddr = &sk->sk_v6_daddr; 740 } else { 741 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 742 saddr = &ip6h->saddr; 743 daddr = &ip6h->daddr; 744 } 745 746 hp = tcp_get_md5sig_pool(); 747 if (!hp) 748 goto clear_hash_noput; 749 req = hp->md5_req; 750 751 if (crypto_ahash_init(req)) 752 goto clear_hash; 753 754 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 755 goto clear_hash; 756 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 757 goto clear_hash; 758 if (tcp_md5_hash_key(hp, key)) 759 goto clear_hash; 760 ahash_request_set_crypt(req, NULL, md5_hash, 0); 761 if (crypto_ahash_final(req)) 762 goto clear_hash; 763 764 tcp_put_md5sig_pool(); 765 return 0; 766 767 clear_hash: 768 tcp_put_md5sig_pool(); 769 clear_hash_noput: 770 memset(md5_hash, 0, 16); 771 return 1; 772 } 773 774 #endif 775 776 static void tcp_v6_init_req(struct request_sock *req, 777 const struct sock *sk_listener, 778 struct sk_buff *skb) 779 { 780 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 781 struct inet_request_sock *ireq = inet_rsk(req); 782 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 783 784 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 785 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 786 787 /* So that link locals have meaning */ 788 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 789 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 790 ireq->ir_iif = tcp_v6_iif(skb); 791 792 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 793 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 794 np->rxopt.bits.rxinfo || 795 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 796 np->rxopt.bits.rxohlim || np->repflow)) { 797 refcount_inc(&skb->users); 798 ireq->pktopts = skb; 799 } 800 } 801 802 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 803 struct sk_buff *skb, 804 struct flowi *fl, 805 struct request_sock *req) 806 { 807 tcp_v6_init_req(req, sk, skb); 808 809 if (security_inet_conn_request(sk, skb, req)) 810 return NULL; 811 812 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 813 } 814 815 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 816 .family = AF_INET6, 817 .obj_size = sizeof(struct tcp6_request_sock), 818 .rtx_syn_ack = tcp_rtx_synack, 819 .send_ack = tcp_v6_reqsk_send_ack, 820 .destructor = tcp_v6_reqsk_destructor, 821 .send_reset = tcp_v6_send_reset, 822 .syn_ack_timeout = tcp_syn_ack_timeout, 823 }; 824 825 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 826 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 827 sizeof(struct ipv6hdr), 828 #ifdef CONFIG_TCP_MD5SIG 829 .req_md5_lookup = tcp_v6_md5_lookup, 830 .calc_md5_hash = tcp_v6_md5_hash_skb, 831 #endif 832 #ifdef CONFIG_SYN_COOKIES 833 .cookie_init_seq = cookie_v6_init_sequence, 834 #endif 835 .route_req = tcp_v6_route_req, 836 .init_seq = tcp_v6_init_seq, 837 .init_ts_off = tcp_v6_init_ts_off, 838 .send_synack = tcp_v6_send_synack, 839 }; 840 841 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 842 u32 ack, u32 win, u32 tsval, u32 tsecr, 843 int oif, struct tcp_md5sig_key *key, int rst, 844 u8 tclass, __be32 label, u32 priority) 845 { 846 const struct tcphdr *th = tcp_hdr(skb); 847 struct tcphdr *t1; 848 struct sk_buff *buff; 849 struct flowi6 fl6; 850 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 851 struct sock *ctl_sk = net->ipv6.tcp_sk; 852 unsigned int tot_len = sizeof(struct tcphdr); 853 __be32 mrst = 0, *topt; 854 struct dst_entry *dst; 855 __u32 mark = 0; 856 857 if (tsecr) 858 tot_len += TCPOLEN_TSTAMP_ALIGNED; 859 #ifdef CONFIG_TCP_MD5SIG 860 if (key) 861 tot_len += TCPOLEN_MD5SIG_ALIGNED; 862 #endif 863 864 #ifdef CONFIG_MPTCP 865 if (rst && !key) { 866 mrst = mptcp_reset_option(skb); 867 868 if (mrst) 869 tot_len += sizeof(__be32); 870 } 871 #endif 872 873 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 874 if (!buff) 875 return; 876 877 skb_reserve(buff, MAX_TCP_HEADER); 878 879 t1 = skb_push(buff, tot_len); 880 skb_reset_transport_header(buff); 881 882 /* Swap the send and the receive. */ 883 memset(t1, 0, sizeof(*t1)); 884 t1->dest = th->source; 885 t1->source = th->dest; 886 t1->doff = tot_len / 4; 887 t1->seq = htonl(seq); 888 t1->ack_seq = htonl(ack); 889 t1->ack = !rst || !th->ack; 890 t1->rst = rst; 891 t1->window = htons(win); 892 893 topt = (__be32 *)(t1 + 1); 894 895 if (tsecr) { 896 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 897 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 898 *topt++ = htonl(tsval); 899 *topt++ = htonl(tsecr); 900 } 901 902 if (mrst) 903 *topt++ = mrst; 904 905 #ifdef CONFIG_TCP_MD5SIG 906 if (key) { 907 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 908 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 909 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 910 &ipv6_hdr(skb)->saddr, 911 &ipv6_hdr(skb)->daddr, t1); 912 } 913 #endif 914 915 memset(&fl6, 0, sizeof(fl6)); 916 fl6.daddr = ipv6_hdr(skb)->saddr; 917 fl6.saddr = ipv6_hdr(skb)->daddr; 918 fl6.flowlabel = label; 919 920 buff->ip_summed = CHECKSUM_PARTIAL; 921 922 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 923 924 fl6.flowi6_proto = IPPROTO_TCP; 925 if (rt6_need_strict(&fl6.daddr) && !oif) 926 fl6.flowi6_oif = tcp_v6_iif(skb); 927 else { 928 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 929 oif = skb->skb_iif; 930 931 fl6.flowi6_oif = oif; 932 } 933 934 if (sk) { 935 if (sk->sk_state == TCP_TIME_WAIT) { 936 mark = inet_twsk(sk)->tw_mark; 937 /* autoflowlabel relies on buff->hash */ 938 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 939 PKT_HASH_TYPE_L4); 940 } else { 941 mark = sk->sk_mark; 942 } 943 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 944 } 945 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 946 fl6.fl6_dport = t1->dest; 947 fl6.fl6_sport = t1->source; 948 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 949 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 950 951 /* Pass a socket to ip6_dst_lookup either it is for RST 952 * Underlying function will use this to retrieve the network 953 * namespace 954 */ 955 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 956 if (!IS_ERR(dst)) { 957 skb_dst_set(buff, dst); 958 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 959 tclass & ~INET_ECN_MASK, priority); 960 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 961 if (rst) 962 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 963 return; 964 } 965 966 kfree_skb(buff); 967 } 968 969 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 970 { 971 const struct tcphdr *th = tcp_hdr(skb); 972 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 973 u32 seq = 0, ack_seq = 0; 974 struct tcp_md5sig_key *key = NULL; 975 #ifdef CONFIG_TCP_MD5SIG 976 const __u8 *hash_location = NULL; 977 unsigned char newhash[16]; 978 int genhash; 979 struct sock *sk1 = NULL; 980 #endif 981 __be32 label = 0; 982 u32 priority = 0; 983 struct net *net; 984 int oif = 0; 985 986 if (th->rst) 987 return; 988 989 /* If sk not NULL, it means we did a successful lookup and incoming 990 * route had to be correct. prequeue might have dropped our dst. 991 */ 992 if (!sk && !ipv6_unicast_destination(skb)) 993 return; 994 995 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 996 #ifdef CONFIG_TCP_MD5SIG 997 rcu_read_lock(); 998 hash_location = tcp_parse_md5sig_option(th); 999 if (sk && sk_fullsock(sk)) { 1000 int l3index; 1001 1002 /* sdif set, means packet ingressed via a device 1003 * in an L3 domain and inet_iif is set to it. 1004 */ 1005 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1006 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1007 } else if (hash_location) { 1008 int dif = tcp_v6_iif_l3_slave(skb); 1009 int sdif = tcp_v6_sdif(skb); 1010 int l3index; 1011 1012 /* 1013 * active side is lost. Try to find listening socket through 1014 * source port, and then find md5 key through listening socket. 1015 * we are not loose security here: 1016 * Incoming packet is checked with md5 hash with finding key, 1017 * no RST generated if md5 hash doesn't match. 1018 */ 1019 sk1 = inet6_lookup_listener(net, 1020 &tcp_hashinfo, NULL, 0, 1021 &ipv6h->saddr, 1022 th->source, &ipv6h->daddr, 1023 ntohs(th->source), dif, sdif); 1024 if (!sk1) 1025 goto out; 1026 1027 /* sdif set, means packet ingressed via a device 1028 * in an L3 domain and dif is set to it. 1029 */ 1030 l3index = tcp_v6_sdif(skb) ? dif : 0; 1031 1032 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1033 if (!key) 1034 goto out; 1035 1036 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1037 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1038 goto out; 1039 } 1040 #endif 1041 1042 if (th->ack) 1043 seq = ntohl(th->ack_seq); 1044 else 1045 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1046 (th->doff << 2); 1047 1048 if (sk) { 1049 oif = sk->sk_bound_dev_if; 1050 if (sk_fullsock(sk)) { 1051 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1052 1053 trace_tcp_send_reset(sk, skb); 1054 if (np->repflow) 1055 label = ip6_flowlabel(ipv6h); 1056 priority = sk->sk_priority; 1057 } 1058 if (sk->sk_state == TCP_TIME_WAIT) { 1059 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1060 priority = inet_twsk(sk)->tw_priority; 1061 } 1062 } else { 1063 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1064 label = ip6_flowlabel(ipv6h); 1065 } 1066 1067 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1068 ipv6_get_dsfield(ipv6h), label, priority); 1069 1070 #ifdef CONFIG_TCP_MD5SIG 1071 out: 1072 rcu_read_unlock(); 1073 #endif 1074 } 1075 1076 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1077 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1078 struct tcp_md5sig_key *key, u8 tclass, 1079 __be32 label, u32 priority) 1080 { 1081 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1082 tclass, label, priority); 1083 } 1084 1085 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1086 { 1087 struct inet_timewait_sock *tw = inet_twsk(sk); 1088 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1089 1090 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1091 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1092 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1093 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1094 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1095 1096 inet_twsk_put(tw); 1097 } 1098 1099 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1100 struct request_sock *req) 1101 { 1102 int l3index; 1103 1104 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1105 1106 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1107 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1108 */ 1109 /* RFC 7323 2.3 1110 * The window field (SEG.WND) of every outgoing segment, with the 1111 * exception of <SYN> segments, MUST be right-shifted by 1112 * Rcv.Wind.Shift bits: 1113 */ 1114 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1115 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1116 tcp_rsk(req)->rcv_nxt, 1117 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1118 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1119 req->ts_recent, sk->sk_bound_dev_if, 1120 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1121 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1122 } 1123 1124 1125 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1126 { 1127 #ifdef CONFIG_SYN_COOKIES 1128 const struct tcphdr *th = tcp_hdr(skb); 1129 1130 if (!th->syn) 1131 sk = cookie_v6_check(sk, skb); 1132 #endif 1133 return sk; 1134 } 1135 1136 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1137 struct tcphdr *th, u32 *cookie) 1138 { 1139 u16 mss = 0; 1140 #ifdef CONFIG_SYN_COOKIES 1141 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1142 &tcp_request_sock_ipv6_ops, sk, th); 1143 if (mss) { 1144 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1145 tcp_synq_overflow(sk); 1146 } 1147 #endif 1148 return mss; 1149 } 1150 1151 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1152 { 1153 if (skb->protocol == htons(ETH_P_IP)) 1154 return tcp_v4_conn_request(sk, skb); 1155 1156 if (!ipv6_unicast_destination(skb)) 1157 goto drop; 1158 1159 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1160 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1161 return 0; 1162 } 1163 1164 return tcp_conn_request(&tcp6_request_sock_ops, 1165 &tcp_request_sock_ipv6_ops, sk, skb); 1166 1167 drop: 1168 tcp_listendrop(sk); 1169 return 0; /* don't send reset */ 1170 } 1171 1172 static void tcp_v6_restore_cb(struct sk_buff *skb) 1173 { 1174 /* We need to move header back to the beginning if xfrm6_policy_check() 1175 * and tcp_v6_fill_cb() are going to be called again. 1176 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1177 */ 1178 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1179 sizeof(struct inet6_skb_parm)); 1180 } 1181 1182 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1183 struct request_sock *req, 1184 struct dst_entry *dst, 1185 struct request_sock *req_unhash, 1186 bool *own_req) 1187 { 1188 struct inet_request_sock *ireq; 1189 struct ipv6_pinfo *newnp; 1190 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1191 struct ipv6_txoptions *opt; 1192 struct inet_sock *newinet; 1193 bool found_dup_sk = false; 1194 struct tcp_sock *newtp; 1195 struct sock *newsk; 1196 #ifdef CONFIG_TCP_MD5SIG 1197 struct tcp_md5sig_key *key; 1198 int l3index; 1199 #endif 1200 struct flowi6 fl6; 1201 1202 if (skb->protocol == htons(ETH_P_IP)) { 1203 /* 1204 * v6 mapped 1205 */ 1206 1207 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1208 req_unhash, own_req); 1209 1210 if (!newsk) 1211 return NULL; 1212 1213 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1214 1215 newnp = tcp_inet6_sk(newsk); 1216 newtp = tcp_sk(newsk); 1217 1218 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1219 1220 newnp->saddr = newsk->sk_v6_rcv_saddr; 1221 1222 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1223 if (sk_is_mptcp(newsk)) 1224 mptcpv6_handle_mapped(newsk, true); 1225 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1226 #ifdef CONFIG_TCP_MD5SIG 1227 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1228 #endif 1229 1230 newnp->ipv6_mc_list = NULL; 1231 newnp->ipv6_ac_list = NULL; 1232 newnp->ipv6_fl_list = NULL; 1233 newnp->pktoptions = NULL; 1234 newnp->opt = NULL; 1235 newnp->mcast_oif = inet_iif(skb); 1236 newnp->mcast_hops = ip_hdr(skb)->ttl; 1237 newnp->rcv_flowinfo = 0; 1238 if (np->repflow) 1239 newnp->flow_label = 0; 1240 1241 /* 1242 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1243 * here, tcp_create_openreq_child now does this for us, see the comment in 1244 * that function for the gory details. -acme 1245 */ 1246 1247 /* It is tricky place. Until this moment IPv4 tcp 1248 worked with IPv6 icsk.icsk_af_ops. 1249 Sync it now. 1250 */ 1251 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1252 1253 return newsk; 1254 } 1255 1256 ireq = inet_rsk(req); 1257 1258 if (sk_acceptq_is_full(sk)) 1259 goto out_overflow; 1260 1261 if (!dst) { 1262 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1263 if (!dst) 1264 goto out; 1265 } 1266 1267 newsk = tcp_create_openreq_child(sk, req, skb); 1268 if (!newsk) 1269 goto out_nonewsk; 1270 1271 /* 1272 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1273 * count here, tcp_create_openreq_child now does this for us, see the 1274 * comment in that function for the gory details. -acme 1275 */ 1276 1277 newsk->sk_gso_type = SKB_GSO_TCPV6; 1278 ip6_dst_store(newsk, dst, NULL, NULL); 1279 inet6_sk_rx_dst_set(newsk, skb); 1280 1281 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1282 1283 newtp = tcp_sk(newsk); 1284 newinet = inet_sk(newsk); 1285 newnp = tcp_inet6_sk(newsk); 1286 1287 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1288 1289 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1290 newnp->saddr = ireq->ir_v6_loc_addr; 1291 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1292 newsk->sk_bound_dev_if = ireq->ir_iif; 1293 1294 /* Now IPv6 options... 1295 1296 First: no IPv4 options. 1297 */ 1298 newinet->inet_opt = NULL; 1299 newnp->ipv6_mc_list = NULL; 1300 newnp->ipv6_ac_list = NULL; 1301 newnp->ipv6_fl_list = NULL; 1302 1303 /* Clone RX bits */ 1304 newnp->rxopt.all = np->rxopt.all; 1305 1306 newnp->pktoptions = NULL; 1307 newnp->opt = NULL; 1308 newnp->mcast_oif = tcp_v6_iif(skb); 1309 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1310 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1311 if (np->repflow) 1312 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1313 1314 /* Set ToS of the new socket based upon the value of incoming SYN. 1315 * ECT bits are set later in tcp_init_transfer(). 1316 */ 1317 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1318 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1319 1320 /* Clone native IPv6 options from listening socket (if any) 1321 1322 Yes, keeping reference count would be much more clever, 1323 but we make one more one thing there: reattach optmem 1324 to newsk. 1325 */ 1326 opt = ireq->ipv6_opt; 1327 if (!opt) 1328 opt = rcu_dereference(np->opt); 1329 if (opt) { 1330 opt = ipv6_dup_options(newsk, opt); 1331 RCU_INIT_POINTER(newnp->opt, opt); 1332 } 1333 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1334 if (opt) 1335 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1336 opt->opt_flen; 1337 1338 tcp_ca_openreq_child(newsk, dst); 1339 1340 tcp_sync_mss(newsk, dst_mtu(dst)); 1341 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1342 1343 tcp_initialize_rcv_mss(newsk); 1344 1345 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1346 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1347 1348 #ifdef CONFIG_TCP_MD5SIG 1349 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1350 1351 /* Copy over the MD5 key from the original socket */ 1352 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1353 if (key) { 1354 /* We're using one, so create a matching key 1355 * on the newsk structure. If we fail to get 1356 * memory, then we end up not copying the key 1357 * across. Shucks. 1358 */ 1359 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1360 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1361 sk_gfp_mask(sk, GFP_ATOMIC)); 1362 } 1363 #endif 1364 1365 if (__inet_inherit_port(sk, newsk) < 0) { 1366 inet_csk_prepare_forced_close(newsk); 1367 tcp_done(newsk); 1368 goto out; 1369 } 1370 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1371 &found_dup_sk); 1372 if (*own_req) { 1373 tcp_move_syn(newtp, req); 1374 1375 /* Clone pktoptions received with SYN, if we own the req */ 1376 if (ireq->pktopts) { 1377 newnp->pktoptions = skb_clone(ireq->pktopts, 1378 sk_gfp_mask(sk, GFP_ATOMIC)); 1379 consume_skb(ireq->pktopts); 1380 ireq->pktopts = NULL; 1381 if (newnp->pktoptions) { 1382 tcp_v6_restore_cb(newnp->pktoptions); 1383 skb_set_owner_r(newnp->pktoptions, newsk); 1384 } 1385 } 1386 } else { 1387 if (!req_unhash && found_dup_sk) { 1388 /* This code path should only be executed in the 1389 * syncookie case only 1390 */ 1391 bh_unlock_sock(newsk); 1392 sock_put(newsk); 1393 newsk = NULL; 1394 } 1395 } 1396 1397 return newsk; 1398 1399 out_overflow: 1400 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1401 out_nonewsk: 1402 dst_release(dst); 1403 out: 1404 tcp_listendrop(sk); 1405 return NULL; 1406 } 1407 1408 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1409 u32)); 1410 /* The socket must have it's spinlock held when we get 1411 * here, unless it is a TCP_LISTEN socket. 1412 * 1413 * We have a potential double-lock case here, so even when 1414 * doing backlog processing we use the BH locking scheme. 1415 * This is because we cannot sleep with the original spinlock 1416 * held. 1417 */ 1418 INDIRECT_CALLABLE_SCOPE 1419 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1420 { 1421 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1422 struct sk_buff *opt_skb = NULL; 1423 enum skb_drop_reason reason; 1424 struct tcp_sock *tp; 1425 1426 /* Imagine: socket is IPv6. IPv4 packet arrives, 1427 goes to IPv4 receive handler and backlogged. 1428 From backlog it always goes here. Kerboom... 1429 Fortunately, tcp_rcv_established and rcv_established 1430 handle them correctly, but it is not case with 1431 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1432 */ 1433 1434 if (skb->protocol == htons(ETH_P_IP)) 1435 return tcp_v4_do_rcv(sk, skb); 1436 1437 /* 1438 * socket locking is here for SMP purposes as backlog rcv 1439 * is currently called with bh processing disabled. 1440 */ 1441 1442 /* Do Stevens' IPV6_PKTOPTIONS. 1443 1444 Yes, guys, it is the only place in our code, where we 1445 may make it not affecting IPv4. 1446 The rest of code is protocol independent, 1447 and I do not like idea to uglify IPv4. 1448 1449 Actually, all the idea behind IPV6_PKTOPTIONS 1450 looks not very well thought. For now we latch 1451 options, received in the last packet, enqueued 1452 by tcp. Feel free to propose better solution. 1453 --ANK (980728) 1454 */ 1455 if (np->rxopt.all) 1456 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1457 1458 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1459 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1460 struct dst_entry *dst; 1461 1462 dst = rcu_dereference_protected(sk->sk_rx_dst, 1463 lockdep_sock_is_held(sk)); 1464 1465 sock_rps_save_rxhash(sk, skb); 1466 sk_mark_napi_id(sk, skb); 1467 if (dst) { 1468 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1469 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1470 dst, sk->sk_rx_dst_cookie) == NULL) { 1471 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1472 dst_release(dst); 1473 } 1474 } 1475 1476 tcp_rcv_established(sk, skb); 1477 if (opt_skb) 1478 goto ipv6_pktoptions; 1479 return 0; 1480 } 1481 1482 if (tcp_checksum_complete(skb)) 1483 goto csum_err; 1484 1485 if (sk->sk_state == TCP_LISTEN) { 1486 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1487 1488 if (!nsk) 1489 goto discard; 1490 1491 if (nsk != sk) { 1492 if (tcp_child_process(sk, nsk, skb)) 1493 goto reset; 1494 if (opt_skb) 1495 __kfree_skb(opt_skb); 1496 return 0; 1497 } 1498 } else 1499 sock_rps_save_rxhash(sk, skb); 1500 1501 if (tcp_rcv_state_process(sk, skb)) 1502 goto reset; 1503 if (opt_skb) 1504 goto ipv6_pktoptions; 1505 return 0; 1506 1507 reset: 1508 tcp_v6_send_reset(sk, skb); 1509 discard: 1510 if (opt_skb) 1511 __kfree_skb(opt_skb); 1512 kfree_skb_reason(skb, reason); 1513 return 0; 1514 csum_err: 1515 reason = SKB_DROP_REASON_TCP_CSUM; 1516 trace_tcp_bad_csum(skb); 1517 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1518 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1519 goto discard; 1520 1521 1522 ipv6_pktoptions: 1523 /* Do you ask, what is it? 1524 1525 1. skb was enqueued by tcp. 1526 2. skb is added to tail of read queue, rather than out of order. 1527 3. socket is not in passive state. 1528 4. Finally, it really contains options, which user wants to receive. 1529 */ 1530 tp = tcp_sk(sk); 1531 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1532 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1533 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1534 np->mcast_oif = tcp_v6_iif(opt_skb); 1535 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1536 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1537 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1538 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1539 if (np->repflow) 1540 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1541 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1542 skb_set_owner_r(opt_skb, sk); 1543 tcp_v6_restore_cb(opt_skb); 1544 opt_skb = xchg(&np->pktoptions, opt_skb); 1545 } else { 1546 __kfree_skb(opt_skb); 1547 opt_skb = xchg(&np->pktoptions, NULL); 1548 } 1549 } 1550 1551 consume_skb(opt_skb); 1552 return 0; 1553 } 1554 1555 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1556 const struct tcphdr *th) 1557 { 1558 /* This is tricky: we move IP6CB at its correct location into 1559 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1560 * _decode_session6() uses IP6CB(). 1561 * barrier() makes sure compiler won't play aliasing games. 1562 */ 1563 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1564 sizeof(struct inet6_skb_parm)); 1565 barrier(); 1566 1567 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1568 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1569 skb->len - th->doff*4); 1570 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1571 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1572 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1573 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1574 TCP_SKB_CB(skb)->sacked = 0; 1575 TCP_SKB_CB(skb)->has_rxtstamp = 1576 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1577 } 1578 1579 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1580 { 1581 enum skb_drop_reason drop_reason; 1582 int sdif = inet6_sdif(skb); 1583 int dif = inet6_iif(skb); 1584 const struct tcphdr *th; 1585 const struct ipv6hdr *hdr; 1586 bool refcounted; 1587 struct sock *sk; 1588 int ret; 1589 struct net *net = dev_net(skb->dev); 1590 1591 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1592 if (skb->pkt_type != PACKET_HOST) 1593 goto discard_it; 1594 1595 /* 1596 * Count it even if it's bad. 1597 */ 1598 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1599 1600 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1601 goto discard_it; 1602 1603 th = (const struct tcphdr *)skb->data; 1604 1605 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1606 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1607 goto bad_packet; 1608 } 1609 if (!pskb_may_pull(skb, th->doff*4)) 1610 goto discard_it; 1611 1612 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1613 goto csum_error; 1614 1615 th = (const struct tcphdr *)skb->data; 1616 hdr = ipv6_hdr(skb); 1617 1618 lookup: 1619 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1620 th->source, th->dest, inet6_iif(skb), sdif, 1621 &refcounted); 1622 if (!sk) 1623 goto no_tcp_socket; 1624 1625 process: 1626 if (sk->sk_state == TCP_TIME_WAIT) 1627 goto do_time_wait; 1628 1629 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1630 struct request_sock *req = inet_reqsk(sk); 1631 bool req_stolen = false; 1632 struct sock *nsk; 1633 1634 sk = req->rsk_listener; 1635 drop_reason = tcp_inbound_md5_hash(sk, skb, 1636 &hdr->saddr, &hdr->daddr, 1637 AF_INET6, dif, sdif); 1638 if (drop_reason) { 1639 sk_drops_add(sk, skb); 1640 reqsk_put(req); 1641 goto discard_it; 1642 } 1643 if (tcp_checksum_complete(skb)) { 1644 reqsk_put(req); 1645 goto csum_error; 1646 } 1647 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1648 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1649 if (!nsk) { 1650 inet_csk_reqsk_queue_drop_and_put(sk, req); 1651 goto lookup; 1652 } 1653 sk = nsk; 1654 /* reuseport_migrate_sock() has already held one sk_refcnt 1655 * before returning. 1656 */ 1657 } else { 1658 sock_hold(sk); 1659 } 1660 refcounted = true; 1661 nsk = NULL; 1662 if (!tcp_filter(sk, skb)) { 1663 th = (const struct tcphdr *)skb->data; 1664 hdr = ipv6_hdr(skb); 1665 tcp_v6_fill_cb(skb, hdr, th); 1666 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1667 } else { 1668 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1669 } 1670 if (!nsk) { 1671 reqsk_put(req); 1672 if (req_stolen) { 1673 /* Another cpu got exclusive access to req 1674 * and created a full blown socket. 1675 * Try to feed this packet to this socket 1676 * instead of discarding it. 1677 */ 1678 tcp_v6_restore_cb(skb); 1679 sock_put(sk); 1680 goto lookup; 1681 } 1682 goto discard_and_relse; 1683 } 1684 if (nsk == sk) { 1685 reqsk_put(req); 1686 tcp_v6_restore_cb(skb); 1687 } else if (tcp_child_process(sk, nsk, skb)) { 1688 tcp_v6_send_reset(nsk, skb); 1689 goto discard_and_relse; 1690 } else { 1691 sock_put(sk); 1692 return 0; 1693 } 1694 } 1695 1696 if (static_branch_unlikely(&ip6_min_hopcount)) { 1697 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1698 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1699 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1700 goto discard_and_relse; 1701 } 1702 } 1703 1704 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1705 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1706 goto discard_and_relse; 1707 } 1708 1709 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1710 AF_INET6, dif, sdif); 1711 if (drop_reason) 1712 goto discard_and_relse; 1713 1714 if (tcp_filter(sk, skb)) { 1715 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1716 goto discard_and_relse; 1717 } 1718 th = (const struct tcphdr *)skb->data; 1719 hdr = ipv6_hdr(skb); 1720 tcp_v6_fill_cb(skb, hdr, th); 1721 1722 skb->dev = NULL; 1723 1724 if (sk->sk_state == TCP_LISTEN) { 1725 ret = tcp_v6_do_rcv(sk, skb); 1726 goto put_and_return; 1727 } 1728 1729 sk_incoming_cpu_update(sk); 1730 1731 bh_lock_sock_nested(sk); 1732 tcp_segs_in(tcp_sk(sk), skb); 1733 ret = 0; 1734 if (!sock_owned_by_user(sk)) { 1735 ret = tcp_v6_do_rcv(sk, skb); 1736 } else { 1737 if (tcp_add_backlog(sk, skb, &drop_reason)) 1738 goto discard_and_relse; 1739 } 1740 bh_unlock_sock(sk); 1741 put_and_return: 1742 if (refcounted) 1743 sock_put(sk); 1744 return ret ? -1 : 0; 1745 1746 no_tcp_socket: 1747 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1748 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1749 goto discard_it; 1750 1751 tcp_v6_fill_cb(skb, hdr, th); 1752 1753 if (tcp_checksum_complete(skb)) { 1754 csum_error: 1755 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1756 trace_tcp_bad_csum(skb); 1757 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1758 bad_packet: 1759 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1760 } else { 1761 tcp_v6_send_reset(NULL, skb); 1762 } 1763 1764 discard_it: 1765 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1766 kfree_skb_reason(skb, drop_reason); 1767 return 0; 1768 1769 discard_and_relse: 1770 sk_drops_add(sk, skb); 1771 if (refcounted) 1772 sock_put(sk); 1773 goto discard_it; 1774 1775 do_time_wait: 1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1777 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1778 inet_twsk_put(inet_twsk(sk)); 1779 goto discard_it; 1780 } 1781 1782 tcp_v6_fill_cb(skb, hdr, th); 1783 1784 if (tcp_checksum_complete(skb)) { 1785 inet_twsk_put(inet_twsk(sk)); 1786 goto csum_error; 1787 } 1788 1789 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1790 case TCP_TW_SYN: 1791 { 1792 struct sock *sk2; 1793 1794 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1795 skb, __tcp_hdrlen(th), 1796 &ipv6_hdr(skb)->saddr, th->source, 1797 &ipv6_hdr(skb)->daddr, 1798 ntohs(th->dest), 1799 tcp_v6_iif_l3_slave(skb), 1800 sdif); 1801 if (sk2) { 1802 struct inet_timewait_sock *tw = inet_twsk(sk); 1803 inet_twsk_deschedule_put(tw); 1804 sk = sk2; 1805 tcp_v6_restore_cb(skb); 1806 refcounted = false; 1807 goto process; 1808 } 1809 } 1810 /* to ACK */ 1811 fallthrough; 1812 case TCP_TW_ACK: 1813 tcp_v6_timewait_ack(sk, skb); 1814 break; 1815 case TCP_TW_RST: 1816 tcp_v6_send_reset(sk, skb); 1817 inet_twsk_deschedule_put(inet_twsk(sk)); 1818 goto discard_it; 1819 case TCP_TW_SUCCESS: 1820 ; 1821 } 1822 goto discard_it; 1823 } 1824 1825 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1826 { 1827 const struct ipv6hdr *hdr; 1828 const struct tcphdr *th; 1829 struct sock *sk; 1830 1831 if (skb->pkt_type != PACKET_HOST) 1832 return; 1833 1834 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1835 return; 1836 1837 hdr = ipv6_hdr(skb); 1838 th = tcp_hdr(skb); 1839 1840 if (th->doff < sizeof(struct tcphdr) / 4) 1841 return; 1842 1843 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1844 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1845 &hdr->saddr, th->source, 1846 &hdr->daddr, ntohs(th->dest), 1847 inet6_iif(skb), inet6_sdif(skb)); 1848 if (sk) { 1849 skb->sk = sk; 1850 skb->destructor = sock_edemux; 1851 if (sk_fullsock(sk)) { 1852 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1853 1854 if (dst) 1855 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1856 if (dst && 1857 sk->sk_rx_dst_ifindex == skb->skb_iif) 1858 skb_dst_set_noref(skb, dst); 1859 } 1860 } 1861 } 1862 1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1865 .twsk_unique = tcp_twsk_unique, 1866 .twsk_destructor = tcp_twsk_destructor, 1867 }; 1868 1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1870 { 1871 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1872 } 1873 1874 const struct inet_connection_sock_af_ops ipv6_specific = { 1875 .queue_xmit = inet6_csk_xmit, 1876 .send_check = tcp_v6_send_check, 1877 .rebuild_header = inet6_sk_rebuild_header, 1878 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1879 .conn_request = tcp_v6_conn_request, 1880 .syn_recv_sock = tcp_v6_syn_recv_sock, 1881 .net_header_len = sizeof(struct ipv6hdr), 1882 .net_frag_header_len = sizeof(struct frag_hdr), 1883 .setsockopt = ipv6_setsockopt, 1884 .getsockopt = ipv6_getsockopt, 1885 .addr2sockaddr = inet6_csk_addr2sockaddr, 1886 .sockaddr_len = sizeof(struct sockaddr_in6), 1887 .mtu_reduced = tcp_v6_mtu_reduced, 1888 }; 1889 1890 #ifdef CONFIG_TCP_MD5SIG 1891 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1892 .md5_lookup = tcp_v6_md5_lookup, 1893 .calc_md5_hash = tcp_v6_md5_hash_skb, 1894 .md5_parse = tcp_v6_parse_md5_keys, 1895 }; 1896 #endif 1897 1898 /* 1899 * TCP over IPv4 via INET6 API 1900 */ 1901 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1902 .queue_xmit = ip_queue_xmit, 1903 .send_check = tcp_v4_send_check, 1904 .rebuild_header = inet_sk_rebuild_header, 1905 .sk_rx_dst_set = inet_sk_rx_dst_set, 1906 .conn_request = tcp_v6_conn_request, 1907 .syn_recv_sock = tcp_v6_syn_recv_sock, 1908 .net_header_len = sizeof(struct iphdr), 1909 .setsockopt = ipv6_setsockopt, 1910 .getsockopt = ipv6_getsockopt, 1911 .addr2sockaddr = inet6_csk_addr2sockaddr, 1912 .sockaddr_len = sizeof(struct sockaddr_in6), 1913 .mtu_reduced = tcp_v4_mtu_reduced, 1914 }; 1915 1916 #ifdef CONFIG_TCP_MD5SIG 1917 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1918 .md5_lookup = tcp_v4_md5_lookup, 1919 .calc_md5_hash = tcp_v4_md5_hash_skb, 1920 .md5_parse = tcp_v6_parse_md5_keys, 1921 }; 1922 #endif 1923 1924 /* NOTE: A lot of things set to zero explicitly by call to 1925 * sk_alloc() so need not be done here. 1926 */ 1927 static int tcp_v6_init_sock(struct sock *sk) 1928 { 1929 struct inet_connection_sock *icsk = inet_csk(sk); 1930 1931 tcp_init_sock(sk); 1932 1933 icsk->icsk_af_ops = &ipv6_specific; 1934 1935 #ifdef CONFIG_TCP_MD5SIG 1936 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1937 #endif 1938 1939 return 0; 1940 } 1941 1942 static void tcp_v6_destroy_sock(struct sock *sk) 1943 { 1944 tcp_v4_destroy_sock(sk); 1945 inet6_destroy_sock(sk); 1946 } 1947 1948 #ifdef CONFIG_PROC_FS 1949 /* Proc filesystem TCPv6 sock list dumping. */ 1950 static void get_openreq6(struct seq_file *seq, 1951 const struct request_sock *req, int i) 1952 { 1953 long ttd = req->rsk_timer.expires - jiffies; 1954 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1955 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1956 1957 if (ttd < 0) 1958 ttd = 0; 1959 1960 seq_printf(seq, 1961 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1962 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1963 i, 1964 src->s6_addr32[0], src->s6_addr32[1], 1965 src->s6_addr32[2], src->s6_addr32[3], 1966 inet_rsk(req)->ir_num, 1967 dest->s6_addr32[0], dest->s6_addr32[1], 1968 dest->s6_addr32[2], dest->s6_addr32[3], 1969 ntohs(inet_rsk(req)->ir_rmt_port), 1970 TCP_SYN_RECV, 1971 0, 0, /* could print option size, but that is af dependent. */ 1972 1, /* timers active (only the expire timer) */ 1973 jiffies_to_clock_t(ttd), 1974 req->num_timeout, 1975 from_kuid_munged(seq_user_ns(seq), 1976 sock_i_uid(req->rsk_listener)), 1977 0, /* non standard timer */ 1978 0, /* open_requests have no inode */ 1979 0, req); 1980 } 1981 1982 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1983 { 1984 const struct in6_addr *dest, *src; 1985 __u16 destp, srcp; 1986 int timer_active; 1987 unsigned long timer_expires; 1988 const struct inet_sock *inet = inet_sk(sp); 1989 const struct tcp_sock *tp = tcp_sk(sp); 1990 const struct inet_connection_sock *icsk = inet_csk(sp); 1991 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1992 int rx_queue; 1993 int state; 1994 1995 dest = &sp->sk_v6_daddr; 1996 src = &sp->sk_v6_rcv_saddr; 1997 destp = ntohs(inet->inet_dport); 1998 srcp = ntohs(inet->inet_sport); 1999 2000 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2001 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2002 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2003 timer_active = 1; 2004 timer_expires = icsk->icsk_timeout; 2005 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2006 timer_active = 4; 2007 timer_expires = icsk->icsk_timeout; 2008 } else if (timer_pending(&sp->sk_timer)) { 2009 timer_active = 2; 2010 timer_expires = sp->sk_timer.expires; 2011 } else { 2012 timer_active = 0; 2013 timer_expires = jiffies; 2014 } 2015 2016 state = inet_sk_state_load(sp); 2017 if (state == TCP_LISTEN) 2018 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2019 else 2020 /* Because we don't lock the socket, 2021 * we might find a transient negative value. 2022 */ 2023 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2024 READ_ONCE(tp->copied_seq), 0); 2025 2026 seq_printf(seq, 2027 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2028 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2029 i, 2030 src->s6_addr32[0], src->s6_addr32[1], 2031 src->s6_addr32[2], src->s6_addr32[3], srcp, 2032 dest->s6_addr32[0], dest->s6_addr32[1], 2033 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2034 state, 2035 READ_ONCE(tp->write_seq) - tp->snd_una, 2036 rx_queue, 2037 timer_active, 2038 jiffies_delta_to_clock_t(timer_expires - jiffies), 2039 icsk->icsk_retransmits, 2040 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2041 icsk->icsk_probes_out, 2042 sock_i_ino(sp), 2043 refcount_read(&sp->sk_refcnt), sp, 2044 jiffies_to_clock_t(icsk->icsk_rto), 2045 jiffies_to_clock_t(icsk->icsk_ack.ato), 2046 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2047 tcp_snd_cwnd(tp), 2048 state == TCP_LISTEN ? 2049 fastopenq->max_qlen : 2050 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2051 ); 2052 } 2053 2054 static void get_timewait6_sock(struct seq_file *seq, 2055 struct inet_timewait_sock *tw, int i) 2056 { 2057 long delta = tw->tw_timer.expires - jiffies; 2058 const struct in6_addr *dest, *src; 2059 __u16 destp, srcp; 2060 2061 dest = &tw->tw_v6_daddr; 2062 src = &tw->tw_v6_rcv_saddr; 2063 destp = ntohs(tw->tw_dport); 2064 srcp = ntohs(tw->tw_sport); 2065 2066 seq_printf(seq, 2067 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2068 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2069 i, 2070 src->s6_addr32[0], src->s6_addr32[1], 2071 src->s6_addr32[2], src->s6_addr32[3], srcp, 2072 dest->s6_addr32[0], dest->s6_addr32[1], 2073 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2074 tw->tw_substate, 0, 0, 2075 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2076 refcount_read(&tw->tw_refcnt), tw); 2077 } 2078 2079 static int tcp6_seq_show(struct seq_file *seq, void *v) 2080 { 2081 struct tcp_iter_state *st; 2082 struct sock *sk = v; 2083 2084 if (v == SEQ_START_TOKEN) { 2085 seq_puts(seq, 2086 " sl " 2087 "local_address " 2088 "remote_address " 2089 "st tx_queue rx_queue tr tm->when retrnsmt" 2090 " uid timeout inode\n"); 2091 goto out; 2092 } 2093 st = seq->private; 2094 2095 if (sk->sk_state == TCP_TIME_WAIT) 2096 get_timewait6_sock(seq, v, st->num); 2097 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2098 get_openreq6(seq, v, st->num); 2099 else 2100 get_tcp6_sock(seq, v, st->num); 2101 out: 2102 return 0; 2103 } 2104 2105 static const struct seq_operations tcp6_seq_ops = { 2106 .show = tcp6_seq_show, 2107 .start = tcp_seq_start, 2108 .next = tcp_seq_next, 2109 .stop = tcp_seq_stop, 2110 }; 2111 2112 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2113 .family = AF_INET6, 2114 }; 2115 2116 int __net_init tcp6_proc_init(struct net *net) 2117 { 2118 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2119 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2120 return -ENOMEM; 2121 return 0; 2122 } 2123 2124 void tcp6_proc_exit(struct net *net) 2125 { 2126 remove_proc_entry("tcp6", net->proc_net); 2127 } 2128 #endif 2129 2130 struct proto tcpv6_prot = { 2131 .name = "TCPv6", 2132 .owner = THIS_MODULE, 2133 .close = tcp_close, 2134 .pre_connect = tcp_v6_pre_connect, 2135 .connect = tcp_v6_connect, 2136 .disconnect = tcp_disconnect, 2137 .accept = inet_csk_accept, 2138 .ioctl = tcp_ioctl, 2139 .init = tcp_v6_init_sock, 2140 .destroy = tcp_v6_destroy_sock, 2141 .shutdown = tcp_shutdown, 2142 .setsockopt = tcp_setsockopt, 2143 .getsockopt = tcp_getsockopt, 2144 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2145 .keepalive = tcp_set_keepalive, 2146 .recvmsg = tcp_recvmsg, 2147 .sendmsg = tcp_sendmsg, 2148 .sendpage = tcp_sendpage, 2149 .backlog_rcv = tcp_v6_do_rcv, 2150 .release_cb = tcp_release_cb, 2151 .hash = inet6_hash, 2152 .unhash = inet_unhash, 2153 .get_port = inet_csk_get_port, 2154 .put_port = inet_put_port, 2155 #ifdef CONFIG_BPF_SYSCALL 2156 .psock_update_sk_prot = tcp_bpf_update_proto, 2157 #endif 2158 .enter_memory_pressure = tcp_enter_memory_pressure, 2159 .leave_memory_pressure = tcp_leave_memory_pressure, 2160 .stream_memory_free = tcp_stream_memory_free, 2161 .sockets_allocated = &tcp_sockets_allocated, 2162 .memory_allocated = &tcp_memory_allocated, 2163 .memory_pressure = &tcp_memory_pressure, 2164 .orphan_count = &tcp_orphan_count, 2165 .sysctl_mem = sysctl_tcp_mem, 2166 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2167 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2168 .max_header = MAX_TCP_HEADER, 2169 .obj_size = sizeof(struct tcp6_sock), 2170 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2171 .twsk_prot = &tcp6_timewait_sock_ops, 2172 .rsk_prot = &tcp6_request_sock_ops, 2173 .h.hashinfo = &tcp_hashinfo, 2174 .no_autobind = true, 2175 .diag_destroy = tcp_abort, 2176 }; 2177 EXPORT_SYMBOL_GPL(tcpv6_prot); 2178 2179 /* thinking of making this const? Don't. 2180 * early_demux can change based on sysctl. 2181 */ 2182 static struct inet6_protocol tcpv6_protocol = { 2183 .early_demux = tcp_v6_early_demux, 2184 .early_demux_handler = tcp_v6_early_demux, 2185 .handler = tcp_v6_rcv, 2186 .err_handler = tcp_v6_err, 2187 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2188 }; 2189 2190 static struct inet_protosw tcpv6_protosw = { 2191 .type = SOCK_STREAM, 2192 .protocol = IPPROTO_TCP, 2193 .prot = &tcpv6_prot, 2194 .ops = &inet6_stream_ops, 2195 .flags = INET_PROTOSW_PERMANENT | 2196 INET_PROTOSW_ICSK, 2197 }; 2198 2199 static int __net_init tcpv6_net_init(struct net *net) 2200 { 2201 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2202 SOCK_RAW, IPPROTO_TCP, net); 2203 } 2204 2205 static void __net_exit tcpv6_net_exit(struct net *net) 2206 { 2207 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2208 } 2209 2210 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2211 { 2212 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2213 } 2214 2215 static struct pernet_operations tcpv6_net_ops = { 2216 .init = tcpv6_net_init, 2217 .exit = tcpv6_net_exit, 2218 .exit_batch = tcpv6_net_exit_batch, 2219 }; 2220 2221 int __init tcpv6_init(void) 2222 { 2223 int ret; 2224 2225 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2226 if (ret) 2227 goto out; 2228 2229 /* register inet6 protocol */ 2230 ret = inet6_register_protosw(&tcpv6_protosw); 2231 if (ret) 2232 goto out_tcpv6_protocol; 2233 2234 ret = register_pernet_subsys(&tcpv6_net_ops); 2235 if (ret) 2236 goto out_tcpv6_protosw; 2237 2238 ret = mptcpv6_init(); 2239 if (ret) 2240 goto out_tcpv6_pernet_subsys; 2241 2242 out: 2243 return ret; 2244 2245 out_tcpv6_pernet_subsys: 2246 unregister_pernet_subsys(&tcpv6_net_ops); 2247 out_tcpv6_protosw: 2248 inet6_unregister_protosw(&tcpv6_protosw); 2249 out_tcpv6_protocol: 2250 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2251 goto out; 2252 } 2253 2254 void tcpv6_exit(void) 2255 { 2256 unregister_pernet_subsys(&tcpv6_net_ops); 2257 inet6_unregister_protosw(&tcpv6_protosw); 2258 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2259 } 2260