1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct inet_timewait_death_row *tcp_death_row; 142 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 143 struct in6_addr *saddr = NULL, *final_p; 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 *fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 160 memset(fl6, 0, sizeof(*fl6)); 161 162 if (inet6_test_bit(SNDFLOW, sk)) { 163 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 164 IP6_ECN_flow_init(fl6->flowlabel); 165 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 166 struct ip6_flowlabel *flowlabel; 167 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 168 if (IS_ERR(flowlabel)) 169 return -EINVAL; 170 fl6_sock_release(flowlabel); 171 } 172 } 173 174 /* 175 * connect() to INADDR_ANY means loopback (BSD'ism). 176 */ 177 178 if (ipv6_addr_any(&usin->sin6_addr)) { 179 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 180 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 181 &usin->sin6_addr); 182 else 183 usin->sin6_addr = in6addr_loopback; 184 } 185 186 addr_type = ipv6_addr_type(&usin->sin6_addr); 187 188 if (addr_type & IPV6_ADDR_MULTICAST) 189 return -ENETUNREACH; 190 191 if (addr_type&IPV6_ADDR_LINKLOCAL) { 192 if (addr_len >= sizeof(struct sockaddr_in6) && 193 usin->sin6_scope_id) { 194 /* If interface is set while binding, indices 195 * must coincide. 196 */ 197 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 198 return -EINVAL; 199 200 sk->sk_bound_dev_if = usin->sin6_scope_id; 201 } 202 203 /* Connect to link-local address requires an interface */ 204 if (!sk->sk_bound_dev_if) 205 return -EINVAL; 206 } 207 208 if (tp->rx_opt.ts_recent_stamp && 209 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 210 tp->rx_opt.ts_recent = 0; 211 tp->rx_opt.ts_recent_stamp = 0; 212 WRITE_ONCE(tp->write_seq, 0); 213 } 214 215 sk->sk_v6_daddr = usin->sin6_addr; 216 np->flow_label = fl6->flowlabel; 217 218 /* 219 * TCP over IPv4 220 */ 221 222 if (addr_type & IPV6_ADDR_MAPPED) { 223 u32 exthdrlen = icsk->icsk_ext_hdr_len; 224 struct sockaddr_in sin; 225 226 if (ipv6_only_sock(sk)) 227 return -ENETUNREACH; 228 229 sin.sin_family = AF_INET; 230 sin.sin_port = usin->sin6_port; 231 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 232 233 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 234 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 235 if (sk_is_mptcp(sk)) 236 mptcpv6_handle_mapped(sk, true); 237 sk->sk_backlog_rcv = tcp_v4_do_rcv; 238 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 239 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 240 #endif 241 242 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 243 244 if (err) { 245 icsk->icsk_ext_hdr_len = exthdrlen; 246 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 247 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 248 if (sk_is_mptcp(sk)) 249 mptcpv6_handle_mapped(sk, false); 250 sk->sk_backlog_rcv = tcp_v6_do_rcv; 251 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 252 tp->af_specific = &tcp_sock_ipv6_specific; 253 #endif 254 goto failure; 255 } 256 np->saddr = sk->sk_v6_rcv_saddr; 257 258 return err; 259 } 260 261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 262 saddr = &sk->sk_v6_rcv_saddr; 263 264 fl6->flowi6_proto = IPPROTO_TCP; 265 fl6->daddr = sk->sk_v6_daddr; 266 fl6->saddr = saddr ? *saddr : np->saddr; 267 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 268 fl6->flowi6_oif = sk->sk_bound_dev_if; 269 fl6->flowi6_mark = sk->sk_mark; 270 fl6->fl6_dport = usin->sin6_port; 271 fl6->fl6_sport = inet->inet_sport; 272 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 273 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 274 fl6->flowi6_uid = sk_uid(sk); 275 276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 277 final_p = fl6_update_dst(fl6, opt, &np->final); 278 279 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 280 281 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 282 if (IS_ERR(dst)) { 283 err = PTR_ERR(dst); 284 goto failure; 285 } 286 287 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 288 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 289 290 if (!saddr) { 291 saddr = &fl6->saddr; 292 293 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 294 if (err) 295 goto failure; 296 } 297 298 /* set the source address */ 299 np->saddr = *saddr; 300 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 301 302 sk->sk_gso_type = SKB_GSO_TCPV6; 303 ip6_dst_store(sk, dst, false, false); 304 305 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 306 if (opt) 307 icsk->icsk_ext_hdr_len += opt->opt_flen + 308 opt->opt_nflen; 309 310 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 311 312 inet->inet_dport = usin->sin6_port; 313 314 tcp_set_state(sk, TCP_SYN_SENT); 315 err = inet6_hash_connect(tcp_death_row, sk); 316 if (err) 317 goto late_failure; 318 319 sk_set_txhash(sk); 320 321 if (likely(!tp->repair)) { 322 if (!tp->write_seq) 323 WRITE_ONCE(tp->write_seq, 324 secure_tcpv6_seq(np->saddr.s6_addr32, 325 sk->sk_v6_daddr.s6_addr32, 326 inet->inet_sport, 327 inet->inet_dport)); 328 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 329 sk->sk_v6_daddr.s6_addr32); 330 } 331 332 if (tcp_fastopen_defer_connect(sk, &err)) 333 return err; 334 if (err) 335 goto late_failure; 336 337 err = tcp_connect(sk); 338 if (err) 339 goto late_failure; 340 341 return 0; 342 343 late_failure: 344 tcp_set_state(sk, TCP_CLOSE); 345 inet_bhash2_reset_saddr(sk); 346 failure: 347 inet->inet_dport = 0; 348 sk->sk_route_caps = 0; 349 return err; 350 } 351 352 static void tcp_v6_mtu_reduced(struct sock *sk) 353 { 354 struct dst_entry *dst; 355 u32 mtu, dmtu; 356 357 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 358 return; 359 360 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 361 362 /* Drop requests trying to increase our current mss. 363 * Check done in __ip6_rt_update_pmtu() is too late. 364 */ 365 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 366 return; 367 368 dst = inet6_csk_update_pmtu(sk, mtu); 369 if (!dst) 370 return; 371 372 dmtu = dst6_mtu(dst); 373 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 374 tcp_sync_mss(sk, dmtu); 375 tcp_simple_retransmit(sk); 376 } 377 } 378 379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 380 u8 type, u8 code, int offset, __be32 info) 381 { 382 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 383 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 384 struct net *net = dev_net_rcu(skb->dev); 385 struct request_sock *fastopen; 386 struct ipv6_pinfo *np; 387 struct tcp_sock *tp; 388 __u32 seq, snd_una; 389 struct sock *sk; 390 bool fatal; 391 int err; 392 393 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 394 &hdr->saddr, ntohs(th->source), 395 skb->dev->ifindex, inet6_sdif(skb)); 396 397 if (!sk) { 398 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 399 ICMP6_MIB_INERRORS); 400 return -ENOENT; 401 } 402 403 if (sk->sk_state == TCP_TIME_WAIT) { 404 /* To increase the counter of ignored icmps for TCP-AO */ 405 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 406 inet_twsk_put(inet_twsk(sk)); 407 return 0; 408 } 409 seq = ntohl(th->seq); 410 fatal = icmpv6_err_convert(type, code, &err); 411 if (sk->sk_state == TCP_NEW_SYN_RECV) { 412 tcp_req_err(sk, seq, fatal); 413 return 0; 414 } 415 416 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 417 sock_put(sk); 418 return 0; 419 } 420 421 bh_lock_sock(sk); 422 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 423 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 424 425 if (sk->sk_state == TCP_CLOSE) 426 goto out; 427 428 if (static_branch_unlikely(&ip6_min_hopcount)) { 429 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 430 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 431 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 432 goto out; 433 } 434 } 435 436 tp = tcp_sk(sk); 437 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 438 fastopen = rcu_dereference(tp->fastopen_rsk); 439 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 440 if (sk->sk_state != TCP_LISTEN && 441 !between(seq, snd_una, tp->snd_nxt)) { 442 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 443 goto out; 444 } 445 446 np = tcp_inet6_sk(sk); 447 448 if (type == NDISC_REDIRECT) { 449 if (!sock_owned_by_user(sk)) { 450 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 451 452 if (dst) 453 dst->ops->redirect(dst, sk, skb); 454 } 455 goto out; 456 } 457 458 if (type == ICMPV6_PKT_TOOBIG) { 459 u32 mtu = ntohl(info); 460 461 /* We are not interested in TCP_LISTEN and open_requests 462 * (SYN-ACKs send out by Linux are always <576bytes so 463 * they should go through unfragmented). 464 */ 465 if (sk->sk_state == TCP_LISTEN) 466 goto out; 467 468 if (!ip6_sk_accept_pmtu(sk)) 469 goto out; 470 471 if (mtu < IPV6_MIN_MTU) 472 goto out; 473 474 WRITE_ONCE(tp->mtu_info, mtu); 475 476 if (!sock_owned_by_user(sk)) 477 tcp_v6_mtu_reduced(sk); 478 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 479 &sk->sk_tsq_flags)) 480 sock_hold(sk); 481 goto out; 482 } 483 484 485 /* Might be for an request_sock */ 486 switch (sk->sk_state) { 487 case TCP_SYN_SENT: 488 case TCP_SYN_RECV: 489 /* Only in fast or simultaneous open. If a fast open socket is 490 * already accepted it is treated as a connected one below. 491 */ 492 if (fastopen && !fastopen->sk) 493 break; 494 495 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 496 497 if (!sock_owned_by_user(sk)) 498 tcp_done_with_error(sk, err); 499 else 500 WRITE_ONCE(sk->sk_err_soft, err); 501 goto out; 502 case TCP_LISTEN: 503 break; 504 default: 505 /* check if this ICMP message allows revert of backoff. 506 * (see RFC 6069) 507 */ 508 if (!fastopen && type == ICMPV6_DEST_UNREACH && 509 code == ICMPV6_NOROUTE) 510 tcp_ld_RTO_revert(sk, seq); 511 } 512 513 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 514 WRITE_ONCE(sk->sk_err, err); 515 sk_error_report(sk); 516 } else { 517 WRITE_ONCE(sk->sk_err_soft, err); 518 } 519 out: 520 bh_unlock_sock(sk); 521 sock_put(sk); 522 return 0; 523 } 524 525 526 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 527 struct flowi *fl, 528 struct request_sock *req, 529 struct tcp_fastopen_cookie *foc, 530 enum tcp_synack_type synack_type, 531 struct sk_buff *syn_skb) 532 { 533 struct inet_request_sock *ireq = inet_rsk(req); 534 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 535 struct ipv6_txoptions *opt; 536 struct flowi6 *fl6 = &fl->u.ip6; 537 struct sk_buff *skb; 538 int err = -ENOMEM; 539 u8 tclass; 540 541 /* First, grab a route. */ 542 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 543 IPPROTO_TCP)) == NULL) 544 goto done; 545 546 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 547 548 if (skb) { 549 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 550 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 551 &ireq->ir_v6_rmt_addr); 552 553 fl6->daddr = ireq->ir_v6_rmt_addr; 554 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 555 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 556 557 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 558 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 559 (np->tclass & INET_ECN_MASK) : 560 np->tclass; 561 562 if (!INET_ECN_is_capable(tclass) && 563 tcp_bpf_ca_needs_ecn((struct sock *)req)) 564 tclass |= INET_ECN_ECT_0; 565 566 rcu_read_lock(); 567 opt = ireq->ipv6_opt; 568 if (!opt) 569 opt = rcu_dereference(np->opt); 570 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 571 opt, tclass, READ_ONCE(sk->sk_priority)); 572 rcu_read_unlock(); 573 err = net_xmit_eval(err); 574 } 575 576 done: 577 return err; 578 } 579 580 581 static void tcp_v6_reqsk_destructor(struct request_sock *req) 582 { 583 kfree(inet_rsk(req)->ipv6_opt); 584 consume_skb(inet_rsk(req)->pktopts); 585 } 586 587 #ifdef CONFIG_TCP_MD5SIG 588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 589 const struct in6_addr *addr, 590 int l3index) 591 { 592 return tcp_md5_do_lookup(sk, l3index, 593 (union tcp_md5_addr *)addr, AF_INET6); 594 } 595 596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 597 const struct sock *addr_sk) 598 { 599 int l3index; 600 601 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 602 addr_sk->sk_bound_dev_if); 603 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 604 l3index); 605 } 606 607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 608 sockptr_t optval, int optlen) 609 { 610 struct tcp_md5sig cmd; 611 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 612 union tcp_ao_addr *addr; 613 int l3index = 0; 614 u8 prefixlen; 615 bool l3flag; 616 u8 flags; 617 618 if (optlen < sizeof(cmd)) 619 return -EINVAL; 620 621 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 622 return -EFAULT; 623 624 if (sin6->sin6_family != AF_INET6) 625 return -EINVAL; 626 627 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 628 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 629 630 if (optname == TCP_MD5SIG_EXT && 631 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 632 prefixlen = cmd.tcpm_prefixlen; 633 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 634 prefixlen > 32)) 635 return -EINVAL; 636 } else { 637 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 638 } 639 640 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 641 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 642 struct net_device *dev; 643 644 rcu_read_lock(); 645 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 646 if (dev && netif_is_l3_master(dev)) 647 l3index = dev->ifindex; 648 rcu_read_unlock(); 649 650 /* ok to reference set/not set outside of rcu; 651 * right now device MUST be an L3 master 652 */ 653 if (!dev || !l3index) 654 return -EINVAL; 655 } 656 657 if (!cmd.tcpm_keylen) { 658 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 659 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 660 AF_INET, prefixlen, 661 l3index, flags); 662 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 663 AF_INET6, prefixlen, l3index, flags); 664 } 665 666 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 667 return -EINVAL; 668 669 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 670 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 671 672 /* Don't allow keys for peers that have a matching TCP-AO key. 673 * See the comment in tcp_ao_add_cmd() 674 */ 675 if (tcp_ao_required(sk, addr, AF_INET, 676 l3flag ? l3index : -1, false)) 677 return -EKEYREJECTED; 678 return tcp_md5_do_add(sk, addr, 679 AF_INET, prefixlen, l3index, flags, 680 cmd.tcpm_key, cmd.tcpm_keylen); 681 } 682 683 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 684 685 /* Don't allow keys for peers that have a matching TCP-AO key. 686 * See the comment in tcp_ao_add_cmd() 687 */ 688 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 689 return -EKEYREJECTED; 690 691 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 692 cmd.tcpm_key, cmd.tcpm_keylen); 693 } 694 695 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 696 const struct in6_addr *daddr, 697 const struct in6_addr *saddr, 698 const struct tcphdr *th, int nbytes) 699 { 700 struct { 701 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 702 struct tcphdr tcp; 703 } h; 704 705 h.ip.saddr = *saddr; 706 h.ip.daddr = *daddr; 707 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 708 h.ip.len = cpu_to_be32(nbytes); 709 h.tcp = *th; 710 h.tcp.check = 0; 711 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 712 } 713 714 static noinline_for_stack void 715 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 716 const struct in6_addr *daddr, struct in6_addr *saddr, 717 const struct tcphdr *th) 718 { 719 struct md5_ctx ctx; 720 721 md5_init(&ctx); 722 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 723 tcp_md5_hash_key(&ctx, key); 724 md5_final(&ctx, md5_hash); 725 } 726 727 static noinline_for_stack void 728 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 729 const struct sock *sk, const struct sk_buff *skb) 730 { 731 const struct tcphdr *th = tcp_hdr(skb); 732 const struct in6_addr *saddr, *daddr; 733 struct md5_ctx ctx; 734 735 if (sk) { /* valid for establish/request sockets */ 736 saddr = &sk->sk_v6_rcv_saddr; 737 daddr = &sk->sk_v6_daddr; 738 } else { 739 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 740 saddr = &ip6h->saddr; 741 daddr = &ip6h->daddr; 742 } 743 744 md5_init(&ctx); 745 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 746 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 747 tcp_md5_hash_key(&ctx, key); 748 md5_final(&ctx, md5_hash); 749 } 750 #endif 751 752 static void tcp_v6_init_req(struct request_sock *req, 753 const struct sock *sk_listener, 754 struct sk_buff *skb, 755 u32 tw_isn) 756 { 757 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 758 struct inet_request_sock *ireq = inet_rsk(req); 759 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 760 761 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 762 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 763 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 764 ireq->ir_loc_addr = LOOPBACK4_IPV6; 765 766 /* So that link locals have meaning */ 767 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 768 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 769 ireq->ir_iif = tcp_v6_iif(skb); 770 771 if (!tw_isn && 772 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 773 np->rxopt.bits.rxinfo || 774 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 775 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 776 refcount_inc(&skb->users); 777 ireq->pktopts = skb; 778 } 779 } 780 781 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 782 struct sk_buff *skb, 783 struct flowi *fl, 784 struct request_sock *req, 785 u32 tw_isn) 786 { 787 tcp_v6_init_req(req, sk, skb, tw_isn); 788 789 if (security_inet_conn_request(sk, skb, req)) 790 return NULL; 791 792 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 793 } 794 795 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 796 .family = AF_INET6, 797 .obj_size = sizeof(struct tcp6_request_sock), 798 .send_ack = tcp_v6_reqsk_send_ack, 799 .destructor = tcp_v6_reqsk_destructor, 800 .send_reset = tcp_v6_send_reset, 801 }; 802 803 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 804 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 805 sizeof(struct ipv6hdr), 806 #ifdef CONFIG_TCP_MD5SIG 807 .req_md5_lookup = tcp_v6_md5_lookup, 808 .calc_md5_hash = tcp_v6_md5_hash_skb, 809 #endif 810 #ifdef CONFIG_TCP_AO 811 .ao_lookup = tcp_v6_ao_lookup_rsk, 812 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 813 .ao_synack_hash = tcp_v6_ao_synack_hash, 814 #endif 815 #ifdef CONFIG_SYN_COOKIES 816 .cookie_init_seq = cookie_v6_init_sequence, 817 #endif 818 .route_req = tcp_v6_route_req, 819 .init_seq = tcp_v6_init_seq, 820 .init_ts_off = tcp_v6_init_ts_off, 821 .send_synack = tcp_v6_send_synack, 822 }; 823 824 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 825 u32 ack, u32 win, u32 tsval, u32 tsecr, 826 int oif, int rst, u8 tclass, __be32 label, 827 u32 priority, u32 txhash, struct tcp_key *key) 828 { 829 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 830 unsigned int tot_len = sizeof(struct tcphdr); 831 struct sock *ctl_sk = net->ipv6.tcp_sk; 832 const struct tcphdr *th = tcp_hdr(skb); 833 __be32 mrst = 0, *topt; 834 struct dst_entry *dst; 835 struct sk_buff *buff; 836 struct tcphdr *t1; 837 struct flowi6 fl6; 838 u32 mark = 0; 839 840 if (tsecr) 841 tot_len += TCPOLEN_TSTAMP_ALIGNED; 842 if (tcp_key_is_md5(key)) 843 tot_len += TCPOLEN_MD5SIG_ALIGNED; 844 if (tcp_key_is_ao(key)) 845 tot_len += tcp_ao_len_aligned(key->ao_key); 846 847 #ifdef CONFIG_MPTCP 848 if (rst && !tcp_key_is_md5(key)) { 849 mrst = mptcp_reset_option(skb); 850 851 if (mrst) 852 tot_len += sizeof(__be32); 853 } 854 #endif 855 856 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 857 if (!buff) 858 return; 859 860 skb_reserve(buff, MAX_TCP_HEADER); 861 862 t1 = skb_push(buff, tot_len); 863 skb_reset_transport_header(buff); 864 865 /* Swap the send and the receive. */ 866 memset(t1, 0, sizeof(*t1)); 867 t1->dest = th->source; 868 t1->source = th->dest; 869 t1->doff = tot_len / 4; 870 t1->seq = htonl(seq); 871 t1->ack_seq = htonl(ack); 872 t1->ack = !rst || !th->ack; 873 t1->rst = rst; 874 t1->window = htons(win); 875 876 topt = (__be32 *)(t1 + 1); 877 878 if (tsecr) { 879 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 880 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 881 *topt++ = htonl(tsval); 882 *topt++ = htonl(tsecr); 883 } 884 885 if (mrst) 886 *topt++ = mrst; 887 888 #ifdef CONFIG_TCP_MD5SIG 889 if (tcp_key_is_md5(key)) { 890 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 891 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 892 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 893 &ipv6_hdr(skb)->saddr, 894 &ipv6_hdr(skb)->daddr, t1); 895 } 896 #endif 897 #ifdef CONFIG_TCP_AO 898 if (tcp_key_is_ao(key)) { 899 *topt++ = htonl((TCPOPT_AO << 24) | 900 (tcp_ao_len(key->ao_key) << 16) | 901 (key->ao_key->sndid << 8) | 902 (key->rcv_next)); 903 904 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 905 key->traffic_key, 906 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 907 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 908 t1, key->sne); 909 } 910 #endif 911 912 memset(&fl6, 0, sizeof(fl6)); 913 fl6.daddr = ipv6_hdr(skb)->saddr; 914 fl6.saddr = ipv6_hdr(skb)->daddr; 915 fl6.flowlabel = label; 916 917 buff->ip_summed = CHECKSUM_PARTIAL; 918 919 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 920 921 fl6.flowi6_proto = IPPROTO_TCP; 922 if (rt6_need_strict(&fl6.daddr) && !oif) 923 fl6.flowi6_oif = tcp_v6_iif(skb); 924 else { 925 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 926 oif = skb->skb_iif; 927 928 fl6.flowi6_oif = oif; 929 } 930 931 if (sk) { 932 /* unconstify the socket only to attach it to buff with care. */ 933 skb_set_owner_edemux(buff, (struct sock *)sk); 934 psp_reply_set_decrypted(sk, buff); 935 936 if (sk->sk_state == TCP_TIME_WAIT) 937 mark = inet_twsk(sk)->tw_mark; 938 else 939 mark = READ_ONCE(sk->sk_mark); 940 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 941 } 942 if (txhash) { 943 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 944 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 945 } 946 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 947 fl6.fl6_dport = t1->dest; 948 fl6.fl6_sport = t1->source; 949 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 950 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 951 952 /* Pass a socket to ip6_dst_lookup either it is for RST 953 * Underlying function will use this to retrieve the network 954 * namespace 955 */ 956 if (sk && sk->sk_state != TCP_TIME_WAIT) 957 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 958 else 959 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 960 if (!IS_ERR(dst)) { 961 skb_dst_set(buff, dst); 962 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 963 tclass, priority); 964 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 965 if (rst) 966 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 967 return; 968 } 969 970 kfree_skb(buff); 971 } 972 973 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 974 enum sk_rst_reason reason) 975 { 976 const struct tcphdr *th = tcp_hdr(skb); 977 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 978 const __u8 *md5_hash_location = NULL; 979 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 980 bool allocated_traffic_key = false; 981 #endif 982 const struct tcp_ao_hdr *aoh; 983 struct tcp_key key = {}; 984 u32 seq = 0, ack_seq = 0; 985 __be32 label = 0; 986 u32 priority = 0; 987 struct net *net; 988 u32 txhash = 0; 989 int oif = 0; 990 #ifdef CONFIG_TCP_MD5SIG 991 unsigned char newhash[16]; 992 struct sock *sk1 = NULL; 993 #endif 994 995 if (th->rst) 996 return; 997 998 /* If sk not NULL, it means we did a successful lookup and incoming 999 * route had to be correct. prequeue might have dropped our dst. 1000 */ 1001 if (!sk && !ipv6_unicast_destination(skb)) 1002 return; 1003 1004 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1005 /* Invalid TCP option size or twice included auth */ 1006 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1007 return; 1008 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1009 rcu_read_lock(); 1010 #endif 1011 #ifdef CONFIG_TCP_MD5SIG 1012 if (sk && sk_fullsock(sk)) { 1013 int l3index; 1014 1015 /* sdif set, means packet ingressed via a device 1016 * in an L3 domain and inet_iif is set to it. 1017 */ 1018 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1019 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1020 if (key.md5_key) 1021 key.type = TCP_KEY_MD5; 1022 } else if (md5_hash_location) { 1023 int dif = tcp_v6_iif_l3_slave(skb); 1024 int sdif = tcp_v6_sdif(skb); 1025 int l3index; 1026 1027 /* 1028 * active side is lost. Try to find listening socket through 1029 * source port, and then find md5 key through listening socket. 1030 * we are not loose security here: 1031 * Incoming packet is checked with md5 hash with finding key, 1032 * no RST generated if md5 hash doesn't match. 1033 */ 1034 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1035 &ipv6h->daddr, ntohs(th->source), 1036 dif, sdif); 1037 if (!sk1) 1038 goto out; 1039 1040 /* sdif set, means packet ingressed via a device 1041 * in an L3 domain and dif is set to it. 1042 */ 1043 l3index = tcp_v6_sdif(skb) ? dif : 0; 1044 1045 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1046 if (!key.md5_key) 1047 goto out; 1048 key.type = TCP_KEY_MD5; 1049 1050 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1051 if (memcmp(md5_hash_location, newhash, 16) != 0) 1052 goto out; 1053 } 1054 #endif 1055 1056 if (th->ack) 1057 seq = ntohl(th->ack_seq); 1058 else 1059 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1060 (th->doff << 2); 1061 1062 #ifdef CONFIG_TCP_AO 1063 if (aoh) { 1064 int l3index; 1065 1066 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1067 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1068 &key.ao_key, &key.traffic_key, 1069 &allocated_traffic_key, 1070 &key.rcv_next, &key.sne)) 1071 goto out; 1072 key.type = TCP_KEY_AO; 1073 } 1074 #endif 1075 1076 if (sk) { 1077 oif = sk->sk_bound_dev_if; 1078 if (sk_fullsock(sk)) { 1079 if (inet6_test_bit(REPFLOW, sk)) 1080 label = ip6_flowlabel(ipv6h); 1081 priority = READ_ONCE(sk->sk_priority); 1082 txhash = sk->sk_txhash; 1083 } 1084 if (sk->sk_state == TCP_TIME_WAIT) { 1085 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1086 priority = inet_twsk(sk)->tw_priority; 1087 txhash = inet_twsk(sk)->tw_txhash; 1088 } 1089 } else { 1090 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1091 FLOWLABEL_REFLECT_TCP_RESET) 1092 label = ip6_flowlabel(ipv6h); 1093 } 1094 1095 trace_tcp_send_reset(sk, skb, reason); 1096 1097 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1098 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1099 label, priority, txhash, 1100 &key); 1101 1102 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1103 out: 1104 if (allocated_traffic_key) 1105 kfree(key.traffic_key); 1106 rcu_read_unlock(); 1107 #endif 1108 } 1109 1110 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1111 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1112 struct tcp_key *key, u8 tclass, 1113 __be32 label, u32 priority, u32 txhash) 1114 { 1115 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1116 tclass, label, priority, txhash, key); 1117 } 1118 1119 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1120 enum tcp_tw_status tw_status) 1121 { 1122 struct inet_timewait_sock *tw = inet_twsk(sk); 1123 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1124 u8 tclass = tw->tw_tclass; 1125 struct tcp_key key = {}; 1126 1127 if (tw_status == TCP_TW_ACK_OOW) 1128 tclass &= ~INET_ECN_MASK; 1129 #ifdef CONFIG_TCP_AO 1130 struct tcp_ao_info *ao_info; 1131 1132 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1133 1134 /* FIXME: the segment to-be-acked is not verified yet */ 1135 ao_info = rcu_dereference(tcptw->ao_info); 1136 if (ao_info) { 1137 const struct tcp_ao_hdr *aoh; 1138 1139 /* Invalid TCP option size or twice included auth */ 1140 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1141 goto out; 1142 if (aoh) 1143 key.ao_key = tcp_ao_established_key(sk, ao_info, 1144 aoh->rnext_keyid, -1); 1145 } 1146 } 1147 if (key.ao_key) { 1148 struct tcp_ao_key *rnext_key; 1149 1150 key.traffic_key = snd_other_key(key.ao_key); 1151 /* rcv_next switches to our rcv_next */ 1152 rnext_key = READ_ONCE(ao_info->rnext_key); 1153 key.rcv_next = rnext_key->rcvid; 1154 key.sne = READ_ONCE(ao_info->snd_sne); 1155 key.type = TCP_KEY_AO; 1156 #else 1157 if (0) { 1158 #endif 1159 #ifdef CONFIG_TCP_MD5SIG 1160 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1161 key.md5_key = tcp_twsk_md5_key(tcptw); 1162 if (key.md5_key) 1163 key.type = TCP_KEY_MD5; 1164 #endif 1165 } 1166 1167 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1168 READ_ONCE(tcptw->tw_rcv_nxt), 1169 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1170 tcp_tw_tsval(tcptw), 1171 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1172 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1173 tw->tw_priority, tw->tw_txhash); 1174 1175 #ifdef CONFIG_TCP_AO 1176 out: 1177 #endif 1178 inet_twsk_put(tw); 1179 } 1180 1181 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1182 struct request_sock *req) 1183 { 1184 struct tcp_key key = {}; 1185 1186 #ifdef CONFIG_TCP_AO 1187 if (static_branch_unlikely(&tcp_ao_needed.key) && 1188 tcp_rsk_used_ao(req)) { 1189 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1190 const struct tcp_ao_hdr *aoh; 1191 int l3index; 1192 1193 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1194 /* Invalid TCP option size or twice included auth */ 1195 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1196 return; 1197 if (!aoh) 1198 return; 1199 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1200 (union tcp_ao_addr *)addr, 1201 AF_INET6, aoh->rnext_keyid, -1); 1202 if (unlikely(!key.ao_key)) { 1203 /* Send ACK with any matching MKT for the peer */ 1204 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1205 (union tcp_ao_addr *)addr, 1206 AF_INET6, -1, -1); 1207 /* Matching key disappeared (user removed the key?) 1208 * let the handshake timeout. 1209 */ 1210 if (!key.ao_key) { 1211 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1212 addr, 1213 ntohs(tcp_hdr(skb)->source), 1214 &ipv6_hdr(skb)->daddr, 1215 ntohs(tcp_hdr(skb)->dest)); 1216 return; 1217 } 1218 } 1219 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1220 if (!key.traffic_key) 1221 return; 1222 1223 key.type = TCP_KEY_AO; 1224 key.rcv_next = aoh->keyid; 1225 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1226 #else 1227 if (0) { 1228 #endif 1229 #ifdef CONFIG_TCP_MD5SIG 1230 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1231 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1232 1233 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1234 l3index); 1235 if (key.md5_key) 1236 key.type = TCP_KEY_MD5; 1237 #endif 1238 } 1239 1240 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1241 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1242 */ 1243 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1244 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1245 tcp_rsk(req)->rcv_nxt, 1246 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1247 tcp_rsk_tsval(tcp_rsk(req)), 1248 req->ts_recent, sk->sk_bound_dev_if, 1249 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1250 0, 1251 READ_ONCE(sk->sk_priority), 1252 READ_ONCE(tcp_rsk(req)->txhash)); 1253 if (tcp_key_is_ao(&key)) 1254 kfree(key.traffic_key); 1255 } 1256 1257 1258 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1259 { 1260 #ifdef CONFIG_SYN_COOKIES 1261 const struct tcphdr *th = tcp_hdr(skb); 1262 1263 if (!th->syn) 1264 sk = cookie_v6_check(sk, skb); 1265 #endif 1266 return sk; 1267 } 1268 1269 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1270 struct tcphdr *th, u32 *cookie) 1271 { 1272 u16 mss = 0; 1273 #ifdef CONFIG_SYN_COOKIES 1274 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1275 &tcp_request_sock_ipv6_ops, sk, th); 1276 if (mss) { 1277 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1278 tcp_synq_overflow(sk); 1279 } 1280 #endif 1281 return mss; 1282 } 1283 1284 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1285 { 1286 if (skb->protocol == htons(ETH_P_IP)) 1287 return tcp_v4_conn_request(sk, skb); 1288 1289 if (!ipv6_unicast_destination(skb)) 1290 goto drop; 1291 1292 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1293 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1294 return 0; 1295 } 1296 1297 return tcp_conn_request(&tcp6_request_sock_ops, 1298 &tcp_request_sock_ipv6_ops, sk, skb); 1299 1300 drop: 1301 tcp_listendrop(sk); 1302 return 0; /* don't send reset */ 1303 } 1304 1305 static void tcp_v6_restore_cb(struct sk_buff *skb) 1306 { 1307 /* We need to move header back to the beginning if xfrm6_policy_check() 1308 * and tcp_v6_fill_cb() are going to be called again. 1309 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1310 */ 1311 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1312 sizeof(struct inet6_skb_parm)); 1313 } 1314 1315 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1316 struct request_sock *req, 1317 struct dst_entry *dst, 1318 struct request_sock *req_unhash, 1319 bool *own_req) 1320 { 1321 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1322 struct inet_request_sock *ireq; 1323 struct ipv6_txoptions *opt; 1324 struct inet_sock *newinet; 1325 bool found_dup_sk = false; 1326 struct ipv6_pinfo *newnp; 1327 struct tcp_sock *newtp; 1328 struct sock *newsk; 1329 #ifdef CONFIG_TCP_MD5SIG 1330 struct tcp_md5sig_key *key; 1331 int l3index; 1332 #endif 1333 struct flowi6 fl6; 1334 1335 if (skb->protocol == htons(ETH_P_IP)) { 1336 /* 1337 * v6 mapped 1338 */ 1339 1340 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1341 req_unhash, own_req); 1342 1343 if (!newsk) 1344 return NULL; 1345 1346 newinet = inet_sk(newsk); 1347 newinet->pinet6 = tcp_inet6_sk(newsk); 1348 newinet->ipv6_fl_list = NULL; 1349 1350 newnp = tcp_inet6_sk(newsk); 1351 newtp = tcp_sk(newsk); 1352 1353 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1354 1355 newnp->saddr = newsk->sk_v6_rcv_saddr; 1356 1357 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1358 if (sk_is_mptcp(newsk)) 1359 mptcpv6_handle_mapped(newsk, true); 1360 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1361 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1362 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1363 #endif 1364 1365 newnp->ipv6_mc_list = NULL; 1366 newnp->ipv6_ac_list = NULL; 1367 newnp->pktoptions = NULL; 1368 newnp->opt = NULL; 1369 newnp->mcast_oif = inet_iif(skb); 1370 newnp->mcast_hops = ip_hdr(skb)->ttl; 1371 newnp->rcv_flowinfo = 0; 1372 if (inet6_test_bit(REPFLOW, sk)) 1373 newnp->flow_label = 0; 1374 1375 /* 1376 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1377 * here, tcp_create_openreq_child now does this for us, see the comment in 1378 * that function for the gory details. -acme 1379 */ 1380 1381 /* It is tricky place. Until this moment IPv4 tcp 1382 worked with IPv6 icsk.icsk_af_ops. 1383 Sync it now. 1384 */ 1385 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1386 1387 return newsk; 1388 } 1389 1390 ireq = inet_rsk(req); 1391 1392 if (sk_acceptq_is_full(sk)) 1393 goto exit_overflow; 1394 1395 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1396 if (!dst) 1397 goto exit; 1398 1399 newsk = tcp_create_openreq_child(sk, req, skb); 1400 if (!newsk) 1401 goto exit_nonewsk; 1402 1403 /* 1404 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1405 * count here, tcp_create_openreq_child now does this for us, see the 1406 * comment in that function for the gory details. -acme 1407 */ 1408 1409 newsk->sk_gso_type = SKB_GSO_TCPV6; 1410 inet6_sk_rx_dst_set(newsk, skb); 1411 1412 newinet = inet_sk(newsk); 1413 newinet->cork.fl.u.ip6 = fl6; 1414 newinet->pinet6 = tcp_inet6_sk(newsk); 1415 newinet->ipv6_fl_list = NULL; 1416 newinet->inet_opt = NULL; 1417 1418 newtp = tcp_sk(newsk); 1419 newnp = tcp_inet6_sk(newsk); 1420 1421 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1422 1423 ip6_dst_store(newsk, dst, false, false); 1424 1425 newnp->saddr = ireq->ir_v6_loc_addr; 1426 1427 /* Now IPv6 options... 1428 1429 First: no IPv4 options. 1430 */ 1431 newnp->ipv6_mc_list = NULL; 1432 newnp->ipv6_ac_list = NULL; 1433 1434 /* Clone RX bits */ 1435 newnp->rxopt.all = np->rxopt.all; 1436 1437 newnp->pktoptions = NULL; 1438 newnp->opt = NULL; 1439 newnp->mcast_oif = tcp_v6_iif(skb); 1440 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1441 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1442 if (inet6_test_bit(REPFLOW, sk)) 1443 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1444 1445 /* Set ToS of the new socket based upon the value of incoming SYN. 1446 * ECT bits are set later in tcp_init_transfer(). 1447 */ 1448 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1449 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1450 1451 /* Clone native IPv6 options from listening socket (if any) 1452 1453 Yes, keeping reference count would be much more clever, 1454 but we make one more one thing there: reattach optmem 1455 to newsk. 1456 */ 1457 opt = ireq->ipv6_opt; 1458 if (!opt) 1459 opt = rcu_dereference(np->opt); 1460 if (opt) { 1461 opt = ipv6_dup_options(newsk, opt); 1462 RCU_INIT_POINTER(newnp->opt, opt); 1463 } 1464 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1465 if (opt) 1466 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1467 opt->opt_flen; 1468 1469 tcp_ca_openreq_child(newsk, dst); 1470 1471 tcp_sync_mss(newsk, dst6_mtu(dst)); 1472 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1473 1474 tcp_initialize_rcv_mss(newsk); 1475 1476 #ifdef CONFIG_TCP_MD5SIG 1477 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1478 1479 if (!tcp_rsk_used_ao(req)) { 1480 /* Copy over the MD5 key from the original socket */ 1481 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1482 if (key) { 1483 const union tcp_md5_addr *addr; 1484 1485 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1486 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1487 goto put_and_exit; 1488 } 1489 } 1490 #endif 1491 #ifdef CONFIG_TCP_AO 1492 /* Copy over tcp_ao_info if any */ 1493 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1494 goto put_and_exit; /* OOM */ 1495 #endif 1496 1497 if (__inet_inherit_port(sk, newsk) < 0) 1498 goto put_and_exit; 1499 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1500 &found_dup_sk); 1501 if (*own_req) { 1502 tcp_move_syn(newtp, req); 1503 1504 /* Clone pktoptions received with SYN, if we own the req */ 1505 if (ireq->pktopts) { 1506 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1507 consume_skb(ireq->pktopts); 1508 ireq->pktopts = NULL; 1509 if (newnp->pktoptions) 1510 tcp_v6_restore_cb(newnp->pktoptions); 1511 } 1512 } else { 1513 if (!req_unhash && found_dup_sk) { 1514 /* This code path should only be executed in the 1515 * syncookie case only 1516 */ 1517 bh_unlock_sock(newsk); 1518 sock_put(newsk); 1519 newsk = NULL; 1520 } 1521 } 1522 1523 return newsk; 1524 1525 exit_overflow: 1526 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1527 exit_nonewsk: 1528 dst_release(dst); 1529 exit: 1530 tcp_listendrop(sk); 1531 return NULL; 1532 put_and_exit: 1533 inet_csk_prepare_forced_close(newsk); 1534 tcp_done(newsk); 1535 goto exit; 1536 } 1537 1538 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1539 u32)); 1540 /* The socket must have it's spinlock held when we get 1541 * here, unless it is a TCP_LISTEN socket. 1542 * 1543 * We have a potential double-lock case here, so even when 1544 * doing backlog processing we use the BH locking scheme. 1545 * This is because we cannot sleep with the original spinlock 1546 * held. 1547 */ 1548 INDIRECT_CALLABLE_SCOPE 1549 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1550 { 1551 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1552 struct sk_buff *opt_skb = NULL; 1553 enum skb_drop_reason reason; 1554 struct tcp_sock *tp; 1555 1556 /* Imagine: socket is IPv6. IPv4 packet arrives, 1557 goes to IPv4 receive handler and backlogged. 1558 From backlog it always goes here. Kerboom... 1559 Fortunately, tcp_rcv_established and rcv_established 1560 handle them correctly, but it is not case with 1561 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1562 */ 1563 1564 if (skb->protocol == htons(ETH_P_IP)) 1565 return tcp_v4_do_rcv(sk, skb); 1566 1567 reason = psp_sk_rx_policy_check(sk, skb); 1568 if (reason) 1569 goto err_discard; 1570 1571 /* 1572 * socket locking is here for SMP purposes as backlog rcv 1573 * is currently called with bh processing disabled. 1574 */ 1575 1576 /* Do Stevens' IPV6_PKTOPTIONS. 1577 1578 Yes, guys, it is the only place in our code, where we 1579 may make it not affecting IPv4. 1580 The rest of code is protocol independent, 1581 and I do not like idea to uglify IPv4. 1582 1583 Actually, all the idea behind IPV6_PKTOPTIONS 1584 looks not very well thought. For now we latch 1585 options, received in the last packet, enqueued 1586 by tcp. Feel free to propose better solution. 1587 --ANK (980728) 1588 */ 1589 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1590 opt_skb = skb_clone_and_charge_r(skb, sk); 1591 1592 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1593 struct dst_entry *dst; 1594 1595 dst = rcu_dereference_protected(sk->sk_rx_dst, 1596 lockdep_sock_is_held(sk)); 1597 1598 sock_rps_save_rxhash(sk, skb); 1599 sk_mark_napi_id(sk, skb); 1600 if (dst) { 1601 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1602 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1603 dst, sk->sk_rx_dst_cookie) == NULL) { 1604 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1605 dst_release(dst); 1606 } 1607 } 1608 1609 tcp_rcv_established(sk, skb); 1610 if (opt_skb) 1611 goto ipv6_pktoptions; 1612 return 0; 1613 } 1614 1615 if (tcp_checksum_complete(skb)) 1616 goto csum_err; 1617 1618 if (sk->sk_state == TCP_LISTEN) { 1619 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1620 1621 if (nsk != sk) { 1622 if (nsk) { 1623 reason = tcp_child_process(sk, nsk, skb); 1624 if (reason) 1625 goto reset; 1626 } 1627 return 0; 1628 } 1629 } else 1630 sock_rps_save_rxhash(sk, skb); 1631 1632 reason = tcp_rcv_state_process(sk, skb); 1633 if (reason) 1634 goto reset; 1635 if (opt_skb) 1636 goto ipv6_pktoptions; 1637 return 0; 1638 1639 reset: 1640 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1641 discard: 1642 if (opt_skb) 1643 __kfree_skb(opt_skb); 1644 sk_skb_reason_drop(sk, skb, reason); 1645 return 0; 1646 csum_err: 1647 reason = SKB_DROP_REASON_TCP_CSUM; 1648 trace_tcp_bad_csum(skb); 1649 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1650 err_discard: 1651 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1652 goto discard; 1653 1654 1655 ipv6_pktoptions: 1656 /* Do you ask, what is it? 1657 1658 1. skb was enqueued by tcp. 1659 2. skb is added to tail of read queue, rather than out of order. 1660 3. socket is not in passive state. 1661 4. Finally, it really contains options, which user wants to receive. 1662 */ 1663 tp = tcp_sk(sk); 1664 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1665 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1666 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1667 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1668 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1669 WRITE_ONCE(np->mcast_hops, 1670 ipv6_hdr(opt_skb)->hop_limit); 1671 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1672 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1673 if (inet6_test_bit(REPFLOW, sk)) 1674 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1675 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1676 tcp_v6_restore_cb(opt_skb); 1677 opt_skb = xchg(&np->pktoptions, opt_skb); 1678 } else { 1679 __kfree_skb(opt_skb); 1680 opt_skb = xchg(&np->pktoptions, NULL); 1681 } 1682 } 1683 1684 consume_skb(opt_skb); 1685 return 0; 1686 } 1687 1688 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1689 const struct tcphdr *th) 1690 { 1691 /* This is tricky: we move IP6CB at its correct location into 1692 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1693 * _decode_session6() uses IP6CB(). 1694 * barrier() makes sure compiler won't play aliasing games. 1695 */ 1696 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1697 sizeof(struct inet6_skb_parm)); 1698 barrier(); 1699 1700 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1701 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1702 skb->len - th->doff*4); 1703 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1704 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1705 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1706 TCP_SKB_CB(skb)->sacked = 0; 1707 TCP_SKB_CB(skb)->has_rxtstamp = 1708 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1709 } 1710 1711 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1712 { 1713 struct net *net = dev_net_rcu(skb->dev); 1714 enum skb_drop_reason drop_reason; 1715 enum tcp_tw_status tw_status; 1716 int sdif = inet6_sdif(skb); 1717 int dif = inet6_iif(skb); 1718 const struct tcphdr *th; 1719 const struct ipv6hdr *hdr; 1720 struct sock *sk = NULL; 1721 bool refcounted; 1722 int ret; 1723 u32 isn; 1724 1725 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1726 if (skb->pkt_type != PACKET_HOST) 1727 goto discard_it; 1728 1729 /* 1730 * Count it even if it's bad. 1731 */ 1732 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1733 1734 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1735 goto discard_it; 1736 1737 th = (const struct tcphdr *)skb->data; 1738 1739 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1740 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1741 goto bad_packet; 1742 } 1743 if (!pskb_may_pull(skb, th->doff*4)) 1744 goto discard_it; 1745 1746 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1747 goto csum_error; 1748 1749 th = (const struct tcphdr *)skb->data; 1750 hdr = ipv6_hdr(skb); 1751 1752 lookup: 1753 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1754 th->source, th->dest, inet6_iif(skb), sdif, 1755 &refcounted); 1756 if (!sk) 1757 goto no_tcp_socket; 1758 1759 if (sk->sk_state == TCP_TIME_WAIT) 1760 goto do_time_wait; 1761 1762 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1763 struct request_sock *req = inet_reqsk(sk); 1764 bool req_stolen = false; 1765 struct sock *nsk; 1766 1767 sk = req->rsk_listener; 1768 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1769 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1770 else 1771 drop_reason = tcp_inbound_hash(sk, req, skb, 1772 &hdr->saddr, &hdr->daddr, 1773 AF_INET6, dif, sdif); 1774 if (drop_reason) { 1775 sk_drops_skbadd(sk, skb); 1776 reqsk_put(req); 1777 goto discard_it; 1778 } 1779 if (tcp_checksum_complete(skb)) { 1780 reqsk_put(req); 1781 goto csum_error; 1782 } 1783 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1784 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1785 if (!nsk) { 1786 inet_csk_reqsk_queue_drop_and_put(sk, req); 1787 goto lookup; 1788 } 1789 sk = nsk; 1790 /* reuseport_migrate_sock() has already held one sk_refcnt 1791 * before returning. 1792 */ 1793 } else { 1794 sock_hold(sk); 1795 } 1796 refcounted = true; 1797 nsk = NULL; 1798 if (!tcp_filter(sk, skb, &drop_reason)) { 1799 th = (const struct tcphdr *)skb->data; 1800 hdr = ipv6_hdr(skb); 1801 tcp_v6_fill_cb(skb, hdr, th); 1802 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1803 &drop_reason); 1804 } 1805 if (!nsk) { 1806 reqsk_put(req); 1807 if (req_stolen) { 1808 /* Another cpu got exclusive access to req 1809 * and created a full blown socket. 1810 * Try to feed this packet to this socket 1811 * instead of discarding it. 1812 */ 1813 tcp_v6_restore_cb(skb); 1814 sock_put(sk); 1815 goto lookup; 1816 } 1817 goto discard_and_relse; 1818 } 1819 nf_reset_ct(skb); 1820 if (nsk == sk) { 1821 reqsk_put(req); 1822 tcp_v6_restore_cb(skb); 1823 } else { 1824 drop_reason = tcp_child_process(sk, nsk, skb); 1825 if (drop_reason) { 1826 enum sk_rst_reason rst_reason; 1827 1828 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1829 tcp_v6_send_reset(nsk, skb, rst_reason); 1830 goto discard_and_relse; 1831 } 1832 sock_put(sk); 1833 return 0; 1834 } 1835 } 1836 1837 process: 1838 if (static_branch_unlikely(&ip6_min_hopcount)) { 1839 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1840 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1841 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1842 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1843 goto discard_and_relse; 1844 } 1845 } 1846 1847 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1848 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1849 goto discard_and_relse; 1850 } 1851 1852 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1853 AF_INET6, dif, sdif); 1854 if (drop_reason) 1855 goto discard_and_relse; 1856 1857 nf_reset_ct(skb); 1858 1859 if (tcp_filter(sk, skb, &drop_reason)) 1860 goto discard_and_relse; 1861 1862 th = (const struct tcphdr *)skb->data; 1863 hdr = ipv6_hdr(skb); 1864 tcp_v6_fill_cb(skb, hdr, th); 1865 1866 skb->dev = NULL; 1867 1868 if (sk->sk_state == TCP_LISTEN) { 1869 ret = tcp_v6_do_rcv(sk, skb); 1870 goto put_and_return; 1871 } 1872 1873 sk_incoming_cpu_update(sk); 1874 1875 bh_lock_sock_nested(sk); 1876 tcp_segs_in(tcp_sk(sk), skb); 1877 ret = 0; 1878 if (!sock_owned_by_user(sk)) { 1879 ret = tcp_v6_do_rcv(sk, skb); 1880 } else { 1881 if (tcp_add_backlog(sk, skb, &drop_reason)) 1882 goto discard_and_relse; 1883 } 1884 bh_unlock_sock(sk); 1885 put_and_return: 1886 if (refcounted) 1887 sock_put(sk); 1888 return ret ? -1 : 0; 1889 1890 no_tcp_socket: 1891 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1892 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1893 goto discard_it; 1894 1895 tcp_v6_fill_cb(skb, hdr, th); 1896 1897 if (tcp_checksum_complete(skb)) { 1898 csum_error: 1899 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1900 trace_tcp_bad_csum(skb); 1901 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1902 bad_packet: 1903 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1904 } else { 1905 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1906 } 1907 1908 discard_it: 1909 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1910 sk_skb_reason_drop(sk, skb, drop_reason); 1911 return 0; 1912 1913 discard_and_relse: 1914 sk_drops_skbadd(sk, skb); 1915 if (refcounted) 1916 sock_put(sk); 1917 goto discard_it; 1918 1919 do_time_wait: 1920 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1921 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1922 inet_twsk_put(inet_twsk(sk)); 1923 goto discard_it; 1924 } 1925 1926 tcp_v6_fill_cb(skb, hdr, th); 1927 1928 if (tcp_checksum_complete(skb)) { 1929 inet_twsk_put(inet_twsk(sk)); 1930 goto csum_error; 1931 } 1932 1933 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1934 &drop_reason); 1935 switch (tw_status) { 1936 case TCP_TW_SYN: 1937 { 1938 struct sock *sk2; 1939 1940 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1941 &ipv6_hdr(skb)->saddr, th->source, 1942 &ipv6_hdr(skb)->daddr, 1943 ntohs(th->dest), 1944 tcp_v6_iif_l3_slave(skb), 1945 sdif); 1946 if (sk2) { 1947 struct inet_timewait_sock *tw = inet_twsk(sk); 1948 inet_twsk_deschedule_put(tw); 1949 sk = sk2; 1950 tcp_v6_restore_cb(skb); 1951 refcounted = false; 1952 __this_cpu_write(tcp_tw_isn, isn); 1953 goto process; 1954 } 1955 1956 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1957 if (drop_reason) 1958 break; 1959 } 1960 /* to ACK */ 1961 fallthrough; 1962 case TCP_TW_ACK: 1963 case TCP_TW_ACK_OOW: 1964 tcp_v6_timewait_ack(sk, skb, tw_status); 1965 break; 1966 case TCP_TW_RST: 1967 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1968 inet_twsk_deschedule_put(inet_twsk(sk)); 1969 goto discard_it; 1970 case TCP_TW_SUCCESS: 1971 ; 1972 } 1973 goto discard_it; 1974 } 1975 1976 void tcp_v6_early_demux(struct sk_buff *skb) 1977 { 1978 struct net *net = dev_net_rcu(skb->dev); 1979 const struct ipv6hdr *hdr; 1980 const struct tcphdr *th; 1981 struct sock *sk; 1982 1983 if (skb->pkt_type != PACKET_HOST) 1984 return; 1985 1986 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1987 return; 1988 1989 hdr = ipv6_hdr(skb); 1990 th = tcp_hdr(skb); 1991 1992 if (th->doff < sizeof(struct tcphdr) / 4) 1993 return; 1994 1995 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1996 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1997 &hdr->daddr, ntohs(th->dest), 1998 inet6_iif(skb), inet6_sdif(skb)); 1999 if (sk) { 2000 skb->sk = sk; 2001 skb->destructor = sock_edemux; 2002 if (sk_fullsock(sk)) { 2003 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2004 2005 if (dst) 2006 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2007 if (dst && 2008 sk->sk_rx_dst_ifindex == skb->skb_iif) 2009 skb_dst_set_noref(skb, dst); 2010 } 2011 } 2012 } 2013 2014 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2015 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2016 }; 2017 2018 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2019 { 2020 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2021 } 2022 2023 const struct inet_connection_sock_af_ops ipv6_specific = { 2024 .queue_xmit = inet6_csk_xmit, 2025 .send_check = tcp_v6_send_check, 2026 .rebuild_header = inet6_sk_rebuild_header, 2027 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2028 .conn_request = tcp_v6_conn_request, 2029 .syn_recv_sock = tcp_v6_syn_recv_sock, 2030 .net_header_len = sizeof(struct ipv6hdr), 2031 .setsockopt = ipv6_setsockopt, 2032 .getsockopt = ipv6_getsockopt, 2033 .mtu_reduced = tcp_v6_mtu_reduced, 2034 }; 2035 2036 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2037 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2038 #ifdef CONFIG_TCP_MD5SIG 2039 .md5_lookup = tcp_v6_md5_lookup, 2040 .calc_md5_hash = tcp_v6_md5_hash_skb, 2041 .md5_parse = tcp_v6_parse_md5_keys, 2042 #endif 2043 #ifdef CONFIG_TCP_AO 2044 .ao_lookup = tcp_v6_ao_lookup, 2045 .calc_ao_hash = tcp_v6_ao_hash_skb, 2046 .ao_parse = tcp_v6_parse_ao, 2047 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2048 #endif 2049 }; 2050 #endif 2051 2052 /* 2053 * TCP over IPv4 via INET6 API 2054 */ 2055 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2056 .queue_xmit = ip_queue_xmit, 2057 .send_check = tcp_v4_send_check, 2058 .rebuild_header = inet_sk_rebuild_header, 2059 .sk_rx_dst_set = inet_sk_rx_dst_set, 2060 .conn_request = tcp_v6_conn_request, 2061 .syn_recv_sock = tcp_v6_syn_recv_sock, 2062 .net_header_len = sizeof(struct iphdr), 2063 .setsockopt = ipv6_setsockopt, 2064 .getsockopt = ipv6_getsockopt, 2065 .mtu_reduced = tcp_v4_mtu_reduced, 2066 }; 2067 2068 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2069 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2070 #ifdef CONFIG_TCP_MD5SIG 2071 .md5_lookup = tcp_v4_md5_lookup, 2072 .calc_md5_hash = tcp_v4_md5_hash_skb, 2073 .md5_parse = tcp_v6_parse_md5_keys, 2074 #endif 2075 #ifdef CONFIG_TCP_AO 2076 .ao_lookup = tcp_v6_ao_lookup, 2077 .calc_ao_hash = tcp_v4_ao_hash_skb, 2078 .ao_parse = tcp_v6_parse_ao, 2079 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2080 #endif 2081 }; 2082 2083 static void tcp6_destruct_sock(struct sock *sk) 2084 { 2085 tcp_md5_destruct_sock(sk); 2086 tcp_ao_destroy_sock(sk, false); 2087 inet6_sock_destruct(sk); 2088 } 2089 #endif 2090 2091 /* NOTE: A lot of things set to zero explicitly by call to 2092 * sk_alloc() so need not be done here. 2093 */ 2094 static int tcp_v6_init_sock(struct sock *sk) 2095 { 2096 struct inet_connection_sock *icsk = inet_csk(sk); 2097 2098 tcp_init_sock(sk); 2099 2100 icsk->icsk_af_ops = &ipv6_specific; 2101 2102 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2103 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2104 sk->sk_destruct = tcp6_destruct_sock; 2105 #endif 2106 2107 return 0; 2108 } 2109 2110 #ifdef CONFIG_PROC_FS 2111 /* Proc filesystem TCPv6 sock list dumping. */ 2112 static void get_openreq6(struct seq_file *seq, 2113 const struct request_sock *req, int i) 2114 { 2115 long ttd = req->rsk_timer.expires - jiffies; 2116 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2117 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2118 2119 if (ttd < 0) 2120 ttd = 0; 2121 2122 seq_printf(seq, 2123 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2124 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2125 i, 2126 src->s6_addr32[0], src->s6_addr32[1], 2127 src->s6_addr32[2], src->s6_addr32[3], 2128 inet_rsk(req)->ir_num, 2129 dest->s6_addr32[0], dest->s6_addr32[1], 2130 dest->s6_addr32[2], dest->s6_addr32[3], 2131 ntohs(inet_rsk(req)->ir_rmt_port), 2132 TCP_SYN_RECV, 2133 0, 0, /* could print option size, but that is af dependent. */ 2134 1, /* timers active (only the expire timer) */ 2135 jiffies_to_clock_t(ttd), 2136 req->num_timeout, 2137 from_kuid_munged(seq_user_ns(seq), 2138 sk_uid(req->rsk_listener)), 2139 0, /* non standard timer */ 2140 0, /* open_requests have no inode */ 2141 0, req); 2142 } 2143 2144 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2145 { 2146 const struct in6_addr *dest, *src; 2147 __u16 destp, srcp; 2148 int timer_active; 2149 unsigned long timer_expires; 2150 const struct inet_sock *inet = inet_sk(sp); 2151 const struct tcp_sock *tp = tcp_sk(sp); 2152 const struct inet_connection_sock *icsk = inet_csk(sp); 2153 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2154 u8 icsk_pending; 2155 int rx_queue; 2156 int state; 2157 2158 dest = &sp->sk_v6_daddr; 2159 src = &sp->sk_v6_rcv_saddr; 2160 destp = ntohs(inet->inet_dport); 2161 srcp = ntohs(inet->inet_sport); 2162 2163 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2164 if (icsk_pending == ICSK_TIME_RETRANS || 2165 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2166 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2167 timer_active = 1; 2168 timer_expires = tcp_timeout_expires(sp); 2169 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2170 timer_active = 4; 2171 timer_expires = tcp_timeout_expires(sp); 2172 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2173 timer_active = 2; 2174 timer_expires = icsk->icsk_keepalive_timer.expires; 2175 } else { 2176 timer_active = 0; 2177 timer_expires = jiffies; 2178 } 2179 2180 state = inet_sk_state_load(sp); 2181 if (state == TCP_LISTEN) 2182 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2183 else 2184 /* Because we don't lock the socket, 2185 * we might find a transient negative value. 2186 */ 2187 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2188 READ_ONCE(tp->copied_seq), 0); 2189 2190 seq_printf(seq, 2191 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2192 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2193 i, 2194 src->s6_addr32[0], src->s6_addr32[1], 2195 src->s6_addr32[2], src->s6_addr32[3], srcp, 2196 dest->s6_addr32[0], dest->s6_addr32[1], 2197 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2198 state, 2199 READ_ONCE(tp->write_seq) - tp->snd_una, 2200 rx_queue, 2201 timer_active, 2202 jiffies_delta_to_clock_t(timer_expires - jiffies), 2203 READ_ONCE(icsk->icsk_retransmits), 2204 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2205 READ_ONCE(icsk->icsk_probes_out), 2206 sock_i_ino(sp), 2207 refcount_read(&sp->sk_refcnt), sp, 2208 jiffies_to_clock_t(icsk->icsk_rto), 2209 jiffies_to_clock_t(icsk->icsk_ack.ato), 2210 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2211 tcp_snd_cwnd(tp), 2212 state == TCP_LISTEN ? 2213 fastopenq->max_qlen : 2214 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2215 ); 2216 } 2217 2218 static void get_timewait6_sock(struct seq_file *seq, 2219 struct inet_timewait_sock *tw, int i) 2220 { 2221 long delta = tw->tw_timer.expires - jiffies; 2222 const struct in6_addr *dest, *src; 2223 __u16 destp, srcp; 2224 2225 dest = &tw->tw_v6_daddr; 2226 src = &tw->tw_v6_rcv_saddr; 2227 destp = ntohs(tw->tw_dport); 2228 srcp = ntohs(tw->tw_sport); 2229 2230 seq_printf(seq, 2231 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2232 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2233 i, 2234 src->s6_addr32[0], src->s6_addr32[1], 2235 src->s6_addr32[2], src->s6_addr32[3], srcp, 2236 dest->s6_addr32[0], dest->s6_addr32[1], 2237 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2238 READ_ONCE(tw->tw_substate), 0, 0, 2239 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2240 refcount_read(&tw->tw_refcnt), tw); 2241 } 2242 2243 static int tcp6_seq_show(struct seq_file *seq, void *v) 2244 { 2245 struct tcp_iter_state *st; 2246 struct sock *sk = v; 2247 2248 if (v == SEQ_START_TOKEN) { 2249 seq_puts(seq, 2250 " sl " 2251 "local_address " 2252 "remote_address " 2253 "st tx_queue rx_queue tr tm->when retrnsmt" 2254 " uid timeout inode\n"); 2255 goto out; 2256 } 2257 st = seq->private; 2258 2259 if (sk->sk_state == TCP_TIME_WAIT) 2260 get_timewait6_sock(seq, v, st->num); 2261 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2262 get_openreq6(seq, v, st->num); 2263 else 2264 get_tcp6_sock(seq, v, st->num); 2265 out: 2266 return 0; 2267 } 2268 2269 static const struct seq_operations tcp6_seq_ops = { 2270 .show = tcp6_seq_show, 2271 .start = tcp_seq_start, 2272 .next = tcp_seq_next, 2273 .stop = tcp_seq_stop, 2274 }; 2275 2276 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2277 .family = AF_INET6, 2278 }; 2279 2280 int __net_init tcp6_proc_init(struct net *net) 2281 { 2282 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2283 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2284 return -ENOMEM; 2285 return 0; 2286 } 2287 2288 void tcp6_proc_exit(struct net *net) 2289 { 2290 remove_proc_entry("tcp6", net->proc_net); 2291 } 2292 #endif 2293 2294 struct proto tcpv6_prot = { 2295 .name = "TCPv6", 2296 .owner = THIS_MODULE, 2297 .close = tcp_close, 2298 .pre_connect = tcp_v6_pre_connect, 2299 .connect = tcp_v6_connect, 2300 .disconnect = tcp_disconnect, 2301 .accept = inet_csk_accept, 2302 .ioctl = tcp_ioctl, 2303 .init = tcp_v6_init_sock, 2304 .destroy = tcp_v4_destroy_sock, 2305 .shutdown = tcp_shutdown, 2306 .setsockopt = tcp_setsockopt, 2307 .getsockopt = tcp_getsockopt, 2308 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2309 .keepalive = tcp_set_keepalive, 2310 .recvmsg = tcp_recvmsg, 2311 .sendmsg = tcp_sendmsg, 2312 .splice_eof = tcp_splice_eof, 2313 .backlog_rcv = tcp_v6_do_rcv, 2314 .release_cb = tcp_release_cb, 2315 .hash = inet_hash, 2316 .unhash = inet_unhash, 2317 .get_port = inet_csk_get_port, 2318 .put_port = inet_put_port, 2319 #ifdef CONFIG_BPF_SYSCALL 2320 .psock_update_sk_prot = tcp_bpf_update_proto, 2321 #endif 2322 .enter_memory_pressure = tcp_enter_memory_pressure, 2323 .leave_memory_pressure = tcp_leave_memory_pressure, 2324 .stream_memory_free = tcp_stream_memory_free, 2325 .sockets_allocated = &tcp_sockets_allocated, 2326 2327 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2328 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2329 2330 .memory_pressure = &tcp_memory_pressure, 2331 .sysctl_mem = sysctl_tcp_mem, 2332 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2333 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2334 .max_header = MAX_TCP_HEADER, 2335 .obj_size = sizeof(struct tcp6_sock), 2336 .freeptr_offset = offsetof(struct tcp6_sock, 2337 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2338 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2339 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2340 .twsk_prot = &tcp6_timewait_sock_ops, 2341 .rsk_prot = &tcp6_request_sock_ops, 2342 .h.hashinfo = NULL, 2343 .no_autobind = true, 2344 .diag_destroy = tcp_abort, 2345 }; 2346 EXPORT_SYMBOL_GPL(tcpv6_prot); 2347 2348 2349 static struct inet_protosw tcpv6_protosw = { 2350 .type = SOCK_STREAM, 2351 .protocol = IPPROTO_TCP, 2352 .prot = &tcpv6_prot, 2353 .ops = &inet6_stream_ops, 2354 .flags = INET_PROTOSW_PERMANENT | 2355 INET_PROTOSW_ICSK, 2356 }; 2357 2358 static int __net_init tcpv6_net_init(struct net *net) 2359 { 2360 int res; 2361 2362 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2363 SOCK_RAW, IPPROTO_TCP, net); 2364 if (!res) 2365 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2366 2367 return res; 2368 } 2369 2370 static void __net_exit tcpv6_net_exit(struct net *net) 2371 { 2372 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2373 } 2374 2375 static struct pernet_operations tcpv6_net_ops = { 2376 .init = tcpv6_net_init, 2377 .exit = tcpv6_net_exit, 2378 }; 2379 2380 int __init tcpv6_init(void) 2381 { 2382 int ret; 2383 2384 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2385 .handler = tcp_v6_rcv, 2386 .err_handler = tcp_v6_err, 2387 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2388 }; 2389 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2390 if (ret) 2391 goto out; 2392 2393 /* register inet6 protocol */ 2394 ret = inet6_register_protosw(&tcpv6_protosw); 2395 if (ret) 2396 goto out_tcpv6_protocol; 2397 2398 ret = register_pernet_subsys(&tcpv6_net_ops); 2399 if (ret) 2400 goto out_tcpv6_protosw; 2401 2402 ret = mptcpv6_init(); 2403 if (ret) 2404 goto out_tcpv6_pernet_subsys; 2405 2406 out: 2407 return ret; 2408 2409 out_tcpv6_pernet_subsys: 2410 unregister_pernet_subsys(&tcpv6_net_ops); 2411 out_tcpv6_protosw: 2412 inet6_unregister_protosw(&tcpv6_protosw); 2413 out_tcpv6_protocol: 2414 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2415 goto out; 2416 } 2417 2418 void tcpv6_exit(void) 2419 { 2420 unregister_pernet_subsys(&tcpv6_net_ops); 2421 inet6_unregister_protosw(&tcpv6_protosw); 2422 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2423 } 2424