1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct inet_timewait_death_row *tcp_death_row; 142 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 143 struct in6_addr *saddr = NULL, *final_p; 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 *fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 160 memset(fl6, 0, sizeof(*fl6)); 161 162 if (inet6_test_bit(SNDFLOW, sk)) { 163 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 164 IP6_ECN_flow_init(fl6->flowlabel); 165 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 166 struct ip6_flowlabel *flowlabel; 167 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 168 if (IS_ERR(flowlabel)) 169 return -EINVAL; 170 fl6_sock_release(flowlabel); 171 } 172 } 173 174 /* 175 * connect() to INADDR_ANY means loopback (BSD'ism). 176 */ 177 178 if (ipv6_addr_any(&usin->sin6_addr)) { 179 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 180 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 181 &usin->sin6_addr); 182 else 183 usin->sin6_addr = in6addr_loopback; 184 } 185 186 addr_type = ipv6_addr_type(&usin->sin6_addr); 187 188 if (addr_type & IPV6_ADDR_MULTICAST) 189 return -ENETUNREACH; 190 191 if (addr_type&IPV6_ADDR_LINKLOCAL) { 192 if (addr_len >= sizeof(struct sockaddr_in6) && 193 usin->sin6_scope_id) { 194 /* If interface is set while binding, indices 195 * must coincide. 196 */ 197 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 198 return -EINVAL; 199 200 sk->sk_bound_dev_if = usin->sin6_scope_id; 201 } 202 203 /* Connect to link-local address requires an interface */ 204 if (!sk->sk_bound_dev_if) 205 return -EINVAL; 206 } 207 208 if (tp->rx_opt.ts_recent_stamp && 209 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 210 tp->rx_opt.ts_recent = 0; 211 tp->rx_opt.ts_recent_stamp = 0; 212 WRITE_ONCE(tp->write_seq, 0); 213 } 214 215 sk->sk_v6_daddr = usin->sin6_addr; 216 np->flow_label = fl6->flowlabel; 217 218 /* 219 * TCP over IPv4 220 */ 221 222 if (addr_type & IPV6_ADDR_MAPPED) { 223 u32 exthdrlen = icsk->icsk_ext_hdr_len; 224 struct sockaddr_in sin; 225 226 if (ipv6_only_sock(sk)) 227 return -ENETUNREACH; 228 229 sin.sin_family = AF_INET; 230 sin.sin_port = usin->sin6_port; 231 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 232 233 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 234 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 235 if (sk_is_mptcp(sk)) 236 mptcpv6_handle_mapped(sk, true); 237 sk->sk_backlog_rcv = tcp_v4_do_rcv; 238 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 239 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 240 #endif 241 242 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 243 244 if (err) { 245 icsk->icsk_ext_hdr_len = exthdrlen; 246 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 247 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 248 if (sk_is_mptcp(sk)) 249 mptcpv6_handle_mapped(sk, false); 250 sk->sk_backlog_rcv = tcp_v6_do_rcv; 251 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 252 tp->af_specific = &tcp_sock_ipv6_specific; 253 #endif 254 goto failure; 255 } 256 np->saddr = sk->sk_v6_rcv_saddr; 257 258 return err; 259 } 260 261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 262 saddr = &sk->sk_v6_rcv_saddr; 263 264 fl6->flowi6_proto = IPPROTO_TCP; 265 fl6->daddr = sk->sk_v6_daddr; 266 fl6->saddr = saddr ? *saddr : np->saddr; 267 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 268 fl6->flowi6_oif = sk->sk_bound_dev_if; 269 fl6->flowi6_mark = sk->sk_mark; 270 fl6->fl6_dport = usin->sin6_port; 271 fl6->fl6_sport = inet->inet_sport; 272 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 273 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 274 fl6->flowi6_uid = sk_uid(sk); 275 276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 277 final_p = fl6_update_dst(fl6, opt, &np->final); 278 279 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 280 281 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 282 if (IS_ERR(dst)) { 283 err = PTR_ERR(dst); 284 goto failure; 285 } 286 287 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 288 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 289 290 if (!saddr) { 291 saddr = &fl6->saddr; 292 293 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 294 if (err) 295 goto failure; 296 } 297 298 /* set the source address */ 299 np->saddr = *saddr; 300 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 301 302 sk->sk_gso_type = SKB_GSO_TCPV6; 303 ip6_dst_store(sk, dst, false, false); 304 305 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 306 if (opt) 307 icsk->icsk_ext_hdr_len += opt->opt_flen + 308 opt->opt_nflen; 309 310 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 311 312 inet->inet_dport = usin->sin6_port; 313 314 tcp_set_state(sk, TCP_SYN_SENT); 315 err = inet6_hash_connect(tcp_death_row, sk); 316 if (err) 317 goto late_failure; 318 319 sk_set_txhash(sk); 320 321 if (likely(!tp->repair)) { 322 if (!tp->write_seq) 323 WRITE_ONCE(tp->write_seq, 324 secure_tcpv6_seq(np->saddr.s6_addr32, 325 sk->sk_v6_daddr.s6_addr32, 326 inet->inet_sport, 327 inet->inet_dport)); 328 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 329 sk->sk_v6_daddr.s6_addr32); 330 } 331 332 if (tcp_fastopen_defer_connect(sk, &err)) 333 return err; 334 if (err) 335 goto late_failure; 336 337 err = tcp_connect(sk); 338 if (err) 339 goto late_failure; 340 341 return 0; 342 343 late_failure: 344 tcp_set_state(sk, TCP_CLOSE); 345 inet_bhash2_reset_saddr(sk); 346 failure: 347 inet->inet_dport = 0; 348 sk->sk_route_caps = 0; 349 return err; 350 } 351 352 static void tcp_v6_mtu_reduced(struct sock *sk) 353 { 354 struct dst_entry *dst; 355 u32 mtu, dmtu; 356 357 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 358 return; 359 360 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 361 362 /* Drop requests trying to increase our current mss. 363 * Check done in __ip6_rt_update_pmtu() is too late. 364 */ 365 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 366 return; 367 368 dst = inet6_csk_update_pmtu(sk, mtu); 369 if (!dst) 370 return; 371 372 dmtu = dst6_mtu(dst); 373 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 374 tcp_sync_mss(sk, dmtu); 375 tcp_simple_retransmit(sk); 376 } 377 } 378 379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 380 u8 type, u8 code, int offset, __be32 info) 381 { 382 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 383 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 384 struct net *net = dev_net_rcu(skb->dev); 385 struct request_sock *fastopen; 386 struct ipv6_pinfo *np; 387 struct tcp_sock *tp; 388 __u32 seq, snd_una; 389 struct sock *sk; 390 bool fatal; 391 int err; 392 393 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 394 &hdr->saddr, ntohs(th->source), 395 skb->dev->ifindex, inet6_sdif(skb)); 396 397 if (!sk) { 398 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 399 ICMP6_MIB_INERRORS); 400 return -ENOENT; 401 } 402 403 if (sk->sk_state == TCP_TIME_WAIT) { 404 /* To increase the counter of ignored icmps for TCP-AO */ 405 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 406 inet_twsk_put(inet_twsk(sk)); 407 return 0; 408 } 409 seq = ntohl(th->seq); 410 fatal = icmpv6_err_convert(type, code, &err); 411 if (sk->sk_state == TCP_NEW_SYN_RECV) { 412 tcp_req_err(sk, seq, fatal); 413 return 0; 414 } 415 416 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 417 sock_put(sk); 418 return 0; 419 } 420 421 bh_lock_sock(sk); 422 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 423 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 424 425 if (sk->sk_state == TCP_CLOSE) 426 goto out; 427 428 if (static_branch_unlikely(&ip6_min_hopcount)) { 429 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 430 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 431 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 432 goto out; 433 } 434 } 435 436 tp = tcp_sk(sk); 437 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 438 fastopen = rcu_dereference(tp->fastopen_rsk); 439 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 440 if (sk->sk_state != TCP_LISTEN && 441 !between(seq, snd_una, tp->snd_nxt)) { 442 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 443 goto out; 444 } 445 446 np = tcp_inet6_sk(sk); 447 448 if (type == NDISC_REDIRECT) { 449 if (!sock_owned_by_user(sk)) { 450 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 451 452 if (dst) 453 dst->ops->redirect(dst, sk, skb); 454 } 455 goto out; 456 } 457 458 if (type == ICMPV6_PKT_TOOBIG) { 459 u32 mtu = ntohl(info); 460 461 /* We are not interested in TCP_LISTEN and open_requests 462 * (SYN-ACKs send out by Linux are always <576bytes so 463 * they should go through unfragmented). 464 */ 465 if (sk->sk_state == TCP_LISTEN) 466 goto out; 467 468 if (!ip6_sk_accept_pmtu(sk)) 469 goto out; 470 471 if (mtu < IPV6_MIN_MTU) 472 goto out; 473 474 WRITE_ONCE(tp->mtu_info, mtu); 475 476 if (!sock_owned_by_user(sk)) 477 tcp_v6_mtu_reduced(sk); 478 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 479 &sk->sk_tsq_flags)) 480 sock_hold(sk); 481 goto out; 482 } 483 484 485 /* Might be for an request_sock */ 486 switch (sk->sk_state) { 487 case TCP_SYN_SENT: 488 case TCP_SYN_RECV: 489 /* Only in fast or simultaneous open. If a fast open socket is 490 * already accepted it is treated as a connected one below. 491 */ 492 if (fastopen && !fastopen->sk) 493 break; 494 495 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 496 497 if (!sock_owned_by_user(sk)) 498 tcp_done_with_error(sk, err); 499 else 500 WRITE_ONCE(sk->sk_err_soft, err); 501 goto out; 502 case TCP_LISTEN: 503 break; 504 default: 505 /* check if this ICMP message allows revert of backoff. 506 * (see RFC 6069) 507 */ 508 if (!fastopen && type == ICMPV6_DEST_UNREACH && 509 code == ICMPV6_NOROUTE) 510 tcp_ld_RTO_revert(sk, seq); 511 } 512 513 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 514 WRITE_ONCE(sk->sk_err, err); 515 sk_error_report(sk); 516 } else { 517 WRITE_ONCE(sk->sk_err_soft, err); 518 } 519 out: 520 bh_unlock_sock(sk); 521 sock_put(sk); 522 return 0; 523 } 524 525 526 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 527 struct flowi *fl, 528 struct request_sock *req, 529 struct tcp_fastopen_cookie *foc, 530 enum tcp_synack_type synack_type, 531 struct sk_buff *syn_skb) 532 { 533 struct inet_request_sock *ireq = inet_rsk(req); 534 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 535 struct ipv6_txoptions *opt; 536 struct flowi6 *fl6 = &fl->u.ip6; 537 struct sk_buff *skb; 538 int err = -ENOMEM; 539 u8 tclass; 540 541 /* First, grab a route. */ 542 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 543 IPPROTO_TCP)) == NULL) 544 goto done; 545 546 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 547 548 if (skb) { 549 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 550 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 551 &ireq->ir_v6_rmt_addr); 552 553 fl6->daddr = ireq->ir_v6_rmt_addr; 554 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 555 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 556 557 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 558 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 559 (np->tclass & INET_ECN_MASK) : 560 np->tclass; 561 562 if (!INET_ECN_is_capable(tclass) && 563 tcp_bpf_ca_needs_ecn((struct sock *)req)) 564 tclass |= INET_ECN_ECT_0; 565 566 rcu_read_lock(); 567 opt = ireq->ipv6_opt; 568 if (!opt) 569 opt = rcu_dereference(np->opt); 570 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 571 opt, tclass, READ_ONCE(sk->sk_priority)); 572 rcu_read_unlock(); 573 err = net_xmit_eval(err); 574 } 575 576 done: 577 return err; 578 } 579 580 581 static void tcp_v6_reqsk_destructor(struct request_sock *req) 582 { 583 kfree(inet_rsk(req)->ipv6_opt); 584 consume_skb(inet_rsk(req)->pktopts); 585 } 586 587 #ifdef CONFIG_TCP_MD5SIG 588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 589 const struct in6_addr *addr, 590 int l3index) 591 { 592 return tcp_md5_do_lookup(sk, l3index, 593 (union tcp_md5_addr *)addr, AF_INET6); 594 } 595 596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 597 const struct sock *addr_sk) 598 { 599 int l3index; 600 601 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 602 addr_sk->sk_bound_dev_if); 603 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 604 l3index); 605 } 606 607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 608 sockptr_t optval, int optlen) 609 { 610 struct tcp_md5sig cmd; 611 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 612 union tcp_ao_addr *addr; 613 int l3index = 0; 614 u8 prefixlen; 615 bool l3flag; 616 u8 flags; 617 618 if (optlen < sizeof(cmd)) 619 return -EINVAL; 620 621 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 622 return -EFAULT; 623 624 if (sin6->sin6_family != AF_INET6) 625 return -EINVAL; 626 627 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 628 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 629 630 if (optname == TCP_MD5SIG_EXT && 631 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 632 prefixlen = cmd.tcpm_prefixlen; 633 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 634 prefixlen > 32)) 635 return -EINVAL; 636 } else { 637 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 638 } 639 640 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 641 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 642 struct net_device *dev; 643 644 rcu_read_lock(); 645 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 646 if (dev && netif_is_l3_master(dev)) 647 l3index = dev->ifindex; 648 rcu_read_unlock(); 649 650 /* ok to reference set/not set outside of rcu; 651 * right now device MUST be an L3 master 652 */ 653 if (!dev || !l3index) 654 return -EINVAL; 655 } 656 657 if (!cmd.tcpm_keylen) { 658 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 659 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 660 AF_INET, prefixlen, 661 l3index, flags); 662 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 663 AF_INET6, prefixlen, l3index, flags); 664 } 665 666 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 667 return -EINVAL; 668 669 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 670 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 671 672 /* Don't allow keys for peers that have a matching TCP-AO key. 673 * See the comment in tcp_ao_add_cmd() 674 */ 675 if (tcp_ao_required(sk, addr, AF_INET, 676 l3flag ? l3index : -1, false)) 677 return -EKEYREJECTED; 678 return tcp_md5_do_add(sk, addr, 679 AF_INET, prefixlen, l3index, flags, 680 cmd.tcpm_key, cmd.tcpm_keylen); 681 } 682 683 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 684 685 /* Don't allow keys for peers that have a matching TCP-AO key. 686 * See the comment in tcp_ao_add_cmd() 687 */ 688 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 689 return -EKEYREJECTED; 690 691 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 692 cmd.tcpm_key, cmd.tcpm_keylen); 693 } 694 695 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 696 const struct in6_addr *daddr, 697 const struct in6_addr *saddr, 698 const struct tcphdr *th, int nbytes) 699 { 700 struct { 701 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 702 struct tcphdr tcp; 703 } h; 704 705 h.ip.saddr = *saddr; 706 h.ip.daddr = *daddr; 707 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 708 h.ip.len = cpu_to_be32(nbytes); 709 h.tcp = *th; 710 h.tcp.check = 0; 711 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 712 } 713 714 static noinline_for_stack void 715 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 716 const struct in6_addr *daddr, struct in6_addr *saddr, 717 const struct tcphdr *th) 718 { 719 struct md5_ctx ctx; 720 721 md5_init(&ctx); 722 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 723 tcp_md5_hash_key(&ctx, key); 724 md5_final(&ctx, md5_hash); 725 } 726 727 static noinline_for_stack void 728 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 729 const struct sock *sk, const struct sk_buff *skb) 730 { 731 const struct tcphdr *th = tcp_hdr(skb); 732 const struct in6_addr *saddr, *daddr; 733 struct md5_ctx ctx; 734 735 if (sk) { /* valid for establish/request sockets */ 736 saddr = &sk->sk_v6_rcv_saddr; 737 daddr = &sk->sk_v6_daddr; 738 } else { 739 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 740 saddr = &ip6h->saddr; 741 daddr = &ip6h->daddr; 742 } 743 744 md5_init(&ctx); 745 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 746 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 747 tcp_md5_hash_key(&ctx, key); 748 md5_final(&ctx, md5_hash); 749 } 750 #endif 751 752 static void tcp_v6_init_req(struct request_sock *req, 753 const struct sock *sk_listener, 754 struct sk_buff *skb, 755 u32 tw_isn) 756 { 757 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 758 struct inet_request_sock *ireq = inet_rsk(req); 759 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 760 761 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 762 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 763 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 764 ireq->ir_loc_addr = LOOPBACK4_IPV6; 765 766 /* So that link locals have meaning */ 767 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 768 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 769 ireq->ir_iif = tcp_v6_iif(skb); 770 771 if (!tw_isn && 772 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 773 np->rxopt.bits.rxinfo || 774 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 775 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 776 refcount_inc(&skb->users); 777 ireq->pktopts = skb; 778 } 779 } 780 781 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 782 struct sk_buff *skb, 783 struct flowi *fl, 784 struct request_sock *req, 785 u32 tw_isn) 786 { 787 tcp_v6_init_req(req, sk, skb, tw_isn); 788 789 if (security_inet_conn_request(sk, skb, req)) 790 return NULL; 791 792 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 793 } 794 795 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 796 .family = AF_INET6, 797 .obj_size = sizeof(struct tcp6_request_sock), 798 .send_ack = tcp_v6_reqsk_send_ack, 799 .destructor = tcp_v6_reqsk_destructor, 800 .send_reset = tcp_v6_send_reset, 801 }; 802 803 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 804 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 805 sizeof(struct ipv6hdr), 806 #ifdef CONFIG_TCP_MD5SIG 807 .req_md5_lookup = tcp_v6_md5_lookup, 808 .calc_md5_hash = tcp_v6_md5_hash_skb, 809 #endif 810 #ifdef CONFIG_TCP_AO 811 .ao_lookup = tcp_v6_ao_lookup_rsk, 812 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 813 .ao_synack_hash = tcp_v6_ao_synack_hash, 814 #endif 815 #ifdef CONFIG_SYN_COOKIES 816 .cookie_init_seq = cookie_v6_init_sequence, 817 #endif 818 .route_req = tcp_v6_route_req, 819 .init_seq = tcp_v6_init_seq, 820 .init_ts_off = tcp_v6_init_ts_off, 821 .send_synack = tcp_v6_send_synack, 822 }; 823 824 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 825 u32 ack, u32 win, u32 tsval, u32 tsecr, 826 int oif, int rst, u8 tclass, __be32 label, 827 u32 priority, u32 txhash, struct tcp_key *key) 828 { 829 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 830 unsigned int tot_len = sizeof(struct tcphdr); 831 struct sock *ctl_sk = net->ipv6.tcp_sk; 832 const struct tcphdr *th = tcp_hdr(skb); 833 __be32 mrst = 0, *topt; 834 struct dst_entry *dst; 835 struct sk_buff *buff; 836 struct tcphdr *t1; 837 struct flowi6 fl6; 838 u32 mark = 0; 839 840 if (tsecr) 841 tot_len += TCPOLEN_TSTAMP_ALIGNED; 842 if (tcp_key_is_md5(key)) 843 tot_len += TCPOLEN_MD5SIG_ALIGNED; 844 if (tcp_key_is_ao(key)) 845 tot_len += tcp_ao_len_aligned(key->ao_key); 846 847 #ifdef CONFIG_MPTCP 848 if (rst && !tcp_key_is_md5(key)) { 849 mrst = mptcp_reset_option(skb); 850 851 if (mrst) 852 tot_len += sizeof(__be32); 853 } 854 #endif 855 856 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 857 if (!buff) 858 return; 859 860 skb_reserve(buff, MAX_TCP_HEADER); 861 862 t1 = skb_push(buff, tot_len); 863 skb_reset_transport_header(buff); 864 865 /* Swap the send and the receive. */ 866 memset(t1, 0, sizeof(*t1)); 867 t1->dest = th->source; 868 t1->source = th->dest; 869 t1->doff = tot_len / 4; 870 t1->seq = htonl(seq); 871 t1->ack_seq = htonl(ack); 872 t1->ack = !rst || !th->ack; 873 t1->rst = rst; 874 t1->window = htons(win); 875 876 topt = (__be32 *)(t1 + 1); 877 878 if (tsecr) { 879 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 880 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 881 *topt++ = htonl(tsval); 882 *topt++ = htonl(tsecr); 883 } 884 885 if (mrst) 886 *topt++ = mrst; 887 888 #ifdef CONFIG_TCP_MD5SIG 889 if (tcp_key_is_md5(key)) { 890 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 891 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 892 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 893 &ipv6_hdr(skb)->saddr, 894 &ipv6_hdr(skb)->daddr, t1); 895 } 896 #endif 897 #ifdef CONFIG_TCP_AO 898 if (tcp_key_is_ao(key)) { 899 *topt++ = htonl((TCPOPT_AO << 24) | 900 (tcp_ao_len(key->ao_key) << 16) | 901 (key->ao_key->sndid << 8) | 902 (key->rcv_next)); 903 904 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 905 key->traffic_key, 906 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 907 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 908 t1, key->sne); 909 } 910 #endif 911 912 memset(&fl6, 0, sizeof(fl6)); 913 fl6.daddr = ipv6_hdr(skb)->saddr; 914 fl6.saddr = ipv6_hdr(skb)->daddr; 915 fl6.flowlabel = label; 916 917 buff->ip_summed = CHECKSUM_PARTIAL; 918 919 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 920 921 fl6.flowi6_proto = IPPROTO_TCP; 922 if (rt6_need_strict(&fl6.daddr) && !oif) 923 fl6.flowi6_oif = tcp_v6_iif(skb); 924 else { 925 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 926 oif = skb->skb_iif; 927 928 fl6.flowi6_oif = oif; 929 } 930 931 if (sk) { 932 /* unconstify the socket only to attach it to buff with care. */ 933 skb_set_owner_edemux(buff, (struct sock *)sk); 934 psp_reply_set_decrypted(sk, buff); 935 936 if (sk->sk_state == TCP_TIME_WAIT) 937 mark = inet_twsk(sk)->tw_mark; 938 else 939 mark = READ_ONCE(sk->sk_mark); 940 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 941 } 942 if (txhash) { 943 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 944 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 945 } 946 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 947 fl6.fl6_dport = t1->dest; 948 fl6.fl6_sport = t1->source; 949 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 950 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 951 952 /* Pass a socket to ip6_dst_lookup either it is for RST 953 * Underlying function will use this to retrieve the network 954 * namespace 955 */ 956 if (sk && sk->sk_state != TCP_TIME_WAIT) 957 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 958 else 959 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 960 if (!IS_ERR(dst)) { 961 skb_dst_set(buff, dst); 962 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 963 tclass, priority); 964 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 965 if (rst) 966 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 967 return; 968 } 969 970 kfree_skb(buff); 971 } 972 973 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 974 enum sk_rst_reason reason) 975 { 976 const struct tcphdr *th = tcp_hdr(skb); 977 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 978 const __u8 *md5_hash_location = NULL; 979 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 980 bool allocated_traffic_key = false; 981 #endif 982 const struct tcp_ao_hdr *aoh; 983 struct tcp_key key = {}; 984 u32 seq = 0, ack_seq = 0; 985 __be32 label = 0; 986 u32 priority = 0; 987 struct net *net; 988 u32 txhash = 0; 989 int oif = 0; 990 #ifdef CONFIG_TCP_MD5SIG 991 unsigned char newhash[16]; 992 struct sock *sk1 = NULL; 993 #endif 994 995 if (th->rst) 996 return; 997 998 /* If sk not NULL, it means we did a successful lookup and incoming 999 * route had to be correct. prequeue might have dropped our dst. 1000 */ 1001 if (!sk && !ipv6_unicast_destination(skb)) 1002 return; 1003 1004 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1005 /* Invalid TCP option size or twice included auth */ 1006 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1007 return; 1008 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1009 rcu_read_lock(); 1010 #endif 1011 #ifdef CONFIG_TCP_MD5SIG 1012 if (sk && sk_fullsock(sk)) { 1013 int l3index; 1014 1015 /* sdif set, means packet ingressed via a device 1016 * in an L3 domain and inet_iif is set to it. 1017 */ 1018 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1019 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1020 if (key.md5_key) 1021 key.type = TCP_KEY_MD5; 1022 } else if (md5_hash_location) { 1023 int dif = tcp_v6_iif_l3_slave(skb); 1024 int sdif = tcp_v6_sdif(skb); 1025 int l3index; 1026 1027 /* 1028 * active side is lost. Try to find listening socket through 1029 * source port, and then find md5 key through listening socket. 1030 * we are not loose security here: 1031 * Incoming packet is checked with md5 hash with finding key, 1032 * no RST generated if md5 hash doesn't match. 1033 */ 1034 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1035 &ipv6h->daddr, ntohs(th->source), 1036 dif, sdif); 1037 if (!sk1) 1038 goto out; 1039 1040 /* sdif set, means packet ingressed via a device 1041 * in an L3 domain and dif is set to it. 1042 */ 1043 l3index = tcp_v6_sdif(skb) ? dif : 0; 1044 1045 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1046 if (!key.md5_key) 1047 goto out; 1048 key.type = TCP_KEY_MD5; 1049 1050 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1051 if (memcmp(md5_hash_location, newhash, 16) != 0) 1052 goto out; 1053 } 1054 #endif 1055 1056 if (th->ack) 1057 seq = ntohl(th->ack_seq); 1058 else 1059 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1060 (th->doff << 2); 1061 1062 #ifdef CONFIG_TCP_AO 1063 if (aoh) { 1064 int l3index; 1065 1066 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1067 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1068 &key.ao_key, &key.traffic_key, 1069 &allocated_traffic_key, 1070 &key.rcv_next, &key.sne)) 1071 goto out; 1072 key.type = TCP_KEY_AO; 1073 } 1074 #endif 1075 1076 if (sk) { 1077 oif = sk->sk_bound_dev_if; 1078 if (sk_fullsock(sk)) { 1079 if (inet6_test_bit(REPFLOW, sk)) 1080 label = ip6_flowlabel(ipv6h); 1081 priority = READ_ONCE(sk->sk_priority); 1082 txhash = sk->sk_txhash; 1083 } 1084 if (sk->sk_state == TCP_TIME_WAIT) { 1085 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1086 priority = inet_twsk(sk)->tw_priority; 1087 txhash = inet_twsk(sk)->tw_txhash; 1088 } 1089 } else { 1090 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1091 FLOWLABEL_REFLECT_TCP_RESET) 1092 label = ip6_flowlabel(ipv6h); 1093 } 1094 1095 trace_tcp_send_reset(sk, skb, reason); 1096 1097 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1098 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1099 label, priority, txhash, 1100 &key); 1101 1102 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1103 out: 1104 if (allocated_traffic_key) 1105 kfree(key.traffic_key); 1106 rcu_read_unlock(); 1107 #endif 1108 } 1109 1110 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1111 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1112 struct tcp_key *key, u8 tclass, 1113 __be32 label, u32 priority, u32 txhash) 1114 { 1115 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1116 tclass, label, priority, txhash, key); 1117 } 1118 1119 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1120 enum tcp_tw_status tw_status) 1121 { 1122 struct inet_timewait_sock *tw = inet_twsk(sk); 1123 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1124 u8 tclass = tw->tw_tclass; 1125 struct tcp_key key = {}; 1126 1127 if (tw_status == TCP_TW_ACK_OOW) 1128 tclass &= ~INET_ECN_MASK; 1129 #ifdef CONFIG_TCP_AO 1130 struct tcp_ao_info *ao_info; 1131 1132 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1133 1134 /* FIXME: the segment to-be-acked is not verified yet */ 1135 ao_info = rcu_dereference(tcptw->ao_info); 1136 if (ao_info) { 1137 const struct tcp_ao_hdr *aoh; 1138 1139 /* Invalid TCP option size or twice included auth */ 1140 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1141 goto out; 1142 if (aoh) 1143 key.ao_key = tcp_ao_established_key(sk, ao_info, 1144 aoh->rnext_keyid, -1); 1145 } 1146 } 1147 if (key.ao_key) { 1148 struct tcp_ao_key *rnext_key; 1149 1150 key.traffic_key = snd_other_key(key.ao_key); 1151 /* rcv_next switches to our rcv_next */ 1152 rnext_key = READ_ONCE(ao_info->rnext_key); 1153 key.rcv_next = rnext_key->rcvid; 1154 key.sne = READ_ONCE(ao_info->snd_sne); 1155 key.type = TCP_KEY_AO; 1156 #else 1157 if (0) { 1158 #endif 1159 #ifdef CONFIG_TCP_MD5SIG 1160 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1161 key.md5_key = tcp_twsk_md5_key(tcptw); 1162 if (key.md5_key) 1163 key.type = TCP_KEY_MD5; 1164 #endif 1165 } 1166 1167 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1168 READ_ONCE(tcptw->tw_rcv_nxt), 1169 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1170 tcp_tw_tsval(tcptw), 1171 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1172 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1173 tw->tw_priority, tw->tw_txhash); 1174 1175 #ifdef CONFIG_TCP_AO 1176 out: 1177 #endif 1178 inet_twsk_put(tw); 1179 } 1180 1181 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1182 struct request_sock *req) 1183 { 1184 struct tcp_key key = {}; 1185 1186 #ifdef CONFIG_TCP_AO 1187 if (static_branch_unlikely(&tcp_ao_needed.key) && 1188 tcp_rsk_used_ao(req)) { 1189 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1190 const struct tcp_ao_hdr *aoh; 1191 int l3index; 1192 1193 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1194 /* Invalid TCP option size or twice included auth */ 1195 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1196 return; 1197 if (!aoh) 1198 return; 1199 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1200 (union tcp_ao_addr *)addr, 1201 AF_INET6, aoh->rnext_keyid, -1); 1202 if (unlikely(!key.ao_key)) { 1203 /* Send ACK with any matching MKT for the peer */ 1204 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1205 (union tcp_ao_addr *)addr, 1206 AF_INET6, -1, -1); 1207 /* Matching key disappeared (user removed the key?) 1208 * let the handshake timeout. 1209 */ 1210 if (!key.ao_key) { 1211 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1212 addr, 1213 ntohs(tcp_hdr(skb)->source), 1214 &ipv6_hdr(skb)->daddr, 1215 ntohs(tcp_hdr(skb)->dest)); 1216 return; 1217 } 1218 } 1219 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1220 if (!key.traffic_key) 1221 return; 1222 1223 key.type = TCP_KEY_AO; 1224 key.rcv_next = aoh->keyid; 1225 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1226 #else 1227 if (0) { 1228 #endif 1229 #ifdef CONFIG_TCP_MD5SIG 1230 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1231 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1232 1233 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1234 l3index); 1235 if (key.md5_key) 1236 key.type = TCP_KEY_MD5; 1237 #endif 1238 } 1239 1240 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1241 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1242 */ 1243 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1244 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1245 tcp_rsk(req)->rcv_nxt, 1246 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1247 tcp_rsk_tsval(tcp_rsk(req)), 1248 req->ts_recent, sk->sk_bound_dev_if, 1249 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1250 0, 1251 READ_ONCE(sk->sk_priority), 1252 READ_ONCE(tcp_rsk(req)->txhash)); 1253 if (tcp_key_is_ao(&key)) 1254 kfree(key.traffic_key); 1255 } 1256 1257 1258 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1259 { 1260 #ifdef CONFIG_SYN_COOKIES 1261 const struct tcphdr *th = tcp_hdr(skb); 1262 1263 if (!th->syn) 1264 sk = cookie_v6_check(sk, skb); 1265 #endif 1266 return sk; 1267 } 1268 1269 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1270 struct tcphdr *th, u32 *cookie) 1271 { 1272 u16 mss = 0; 1273 #ifdef CONFIG_SYN_COOKIES 1274 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1275 &tcp_request_sock_ipv6_ops, sk, th); 1276 if (mss) { 1277 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1278 tcp_synq_overflow(sk); 1279 } 1280 #endif 1281 return mss; 1282 } 1283 1284 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1285 { 1286 if (skb->protocol == htons(ETH_P_IP)) 1287 return tcp_v4_conn_request(sk, skb); 1288 1289 if (!ipv6_unicast_destination(skb)) 1290 goto drop; 1291 1292 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1293 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1294 return 0; 1295 } 1296 1297 return tcp_conn_request(&tcp6_request_sock_ops, 1298 &tcp_request_sock_ipv6_ops, sk, skb); 1299 1300 drop: 1301 tcp_listendrop(sk); 1302 return 0; /* don't send reset */ 1303 } 1304 1305 static void tcp_v6_restore_cb(struct sk_buff *skb) 1306 { 1307 /* We need to move header back to the beginning if xfrm6_policy_check() 1308 * and tcp_v6_fill_cb() are going to be called again. 1309 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1310 */ 1311 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1312 sizeof(struct inet6_skb_parm)); 1313 } 1314 1315 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ 1316 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) 1317 { 1318 struct inet_sock *newinet = inet_sk(newsk); 1319 struct ipv6_pinfo *newnp; 1320 1321 newinet->pinet6 = newnp = tcp_inet6_sk(newsk); 1322 newinet->ipv6_fl_list = NULL; 1323 1324 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); 1325 1326 newnp->saddr = newsk->sk_v6_rcv_saddr; 1327 1328 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1329 if (sk_is_mptcp(newsk)) 1330 mptcpv6_handle_mapped(newsk, true); 1331 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1332 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1333 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; 1334 #endif 1335 1336 newnp->ipv6_mc_list = NULL; 1337 newnp->ipv6_ac_list = NULL; 1338 newnp->pktoptions = NULL; 1339 newnp->opt = NULL; 1340 1341 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ 1342 newnp->mcast_oif = newinet->mc_index; 1343 newnp->mcast_hops = newinet->mc_ttl; 1344 1345 newnp->rcv_flowinfo = 0; 1346 if (inet6_test_bit(REPFLOW, sk)) 1347 newnp->flow_label = 0; 1348 } 1349 1350 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1351 struct request_sock *req, 1352 struct dst_entry *dst, 1353 struct request_sock *req_unhash, 1354 bool *own_req, 1355 void (*opt_child_init)(struct sock *newsk, 1356 const struct sock *sk)) 1357 { 1358 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1359 struct inet_request_sock *ireq; 1360 struct ipv6_txoptions *opt; 1361 struct inet_sock *newinet; 1362 bool found_dup_sk = false; 1363 struct ipv6_pinfo *newnp; 1364 struct tcp_sock *newtp; 1365 struct sock *newsk; 1366 #ifdef CONFIG_TCP_MD5SIG 1367 struct tcp_md5sig_key *key; 1368 int l3index; 1369 #endif 1370 struct flowi6 fl6; 1371 1372 if (skb->protocol == htons(ETH_P_IP)) 1373 return tcp_v4_syn_recv_sock(sk, skb, req, dst, 1374 req_unhash, own_req, 1375 tcp_v6_mapped_child_init); 1376 ireq = inet_rsk(req); 1377 1378 if (sk_acceptq_is_full(sk)) 1379 goto exit_overflow; 1380 1381 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1382 if (!dst) 1383 goto exit; 1384 1385 newsk = tcp_create_openreq_child(sk, req, skb); 1386 if (!newsk) 1387 goto exit_nonewsk; 1388 1389 /* 1390 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1391 * count here, tcp_create_openreq_child now does this for us, see the 1392 * comment in that function for the gory details. -acme 1393 */ 1394 1395 newsk->sk_gso_type = SKB_GSO_TCPV6; 1396 inet6_sk_rx_dst_set(newsk, skb); 1397 1398 newinet = inet_sk(newsk); 1399 newinet->cork.fl.u.ip6 = fl6; 1400 newinet->pinet6 = tcp_inet6_sk(newsk); 1401 newinet->ipv6_fl_list = NULL; 1402 newinet->inet_opt = NULL; 1403 1404 newtp = tcp_sk(newsk); 1405 newnp = tcp_inet6_sk(newsk); 1406 1407 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1408 1409 ip6_dst_store(newsk, dst, false, false); 1410 1411 newnp->saddr = ireq->ir_v6_loc_addr; 1412 1413 /* Now IPv6 options... 1414 1415 First: no IPv4 options. 1416 */ 1417 newnp->ipv6_mc_list = NULL; 1418 newnp->ipv6_ac_list = NULL; 1419 1420 /* Clone RX bits */ 1421 newnp->rxopt.all = np->rxopt.all; 1422 1423 newnp->pktoptions = NULL; 1424 newnp->opt = NULL; 1425 newnp->mcast_oif = tcp_v6_iif(skb); 1426 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1427 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1428 if (inet6_test_bit(REPFLOW, sk)) 1429 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1430 1431 /* Set ToS of the new socket based upon the value of incoming SYN. 1432 * ECT bits are set later in tcp_init_transfer(). 1433 */ 1434 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1435 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1436 1437 /* Clone native IPv6 options from listening socket (if any) 1438 1439 Yes, keeping reference count would be much more clever, 1440 but we make one more one thing there: reattach optmem 1441 to newsk. 1442 */ 1443 opt = ireq->ipv6_opt; 1444 if (!opt) 1445 opt = rcu_dereference(np->opt); 1446 if (opt) { 1447 opt = ipv6_dup_options(newsk, opt); 1448 RCU_INIT_POINTER(newnp->opt, opt); 1449 } 1450 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1451 if (opt) 1452 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1453 opt->opt_flen; 1454 1455 tcp_ca_openreq_child(newsk, dst); 1456 1457 tcp_sync_mss(newsk, dst6_mtu(dst)); 1458 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1459 1460 tcp_initialize_rcv_mss(newsk); 1461 1462 #ifdef CONFIG_TCP_MD5SIG 1463 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1464 1465 if (!tcp_rsk_used_ao(req)) { 1466 /* Copy over the MD5 key from the original socket */ 1467 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1468 if (key) { 1469 const union tcp_md5_addr *addr; 1470 1471 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1472 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1473 goto put_and_exit; 1474 } 1475 } 1476 #endif 1477 #ifdef CONFIG_TCP_AO 1478 /* Copy over tcp_ao_info if any */ 1479 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1480 goto put_and_exit; /* OOM */ 1481 #endif 1482 1483 if (__inet_inherit_port(sk, newsk) < 0) 1484 goto put_and_exit; 1485 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1486 &found_dup_sk); 1487 if (*own_req) { 1488 tcp_move_syn(newtp, req); 1489 1490 /* Clone pktoptions received with SYN, if we own the req */ 1491 if (ireq->pktopts) { 1492 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1493 consume_skb(ireq->pktopts); 1494 ireq->pktopts = NULL; 1495 if (newnp->pktoptions) 1496 tcp_v6_restore_cb(newnp->pktoptions); 1497 } 1498 } else { 1499 if (!req_unhash && found_dup_sk) { 1500 /* This code path should only be executed in the 1501 * syncookie case only 1502 */ 1503 bh_unlock_sock(newsk); 1504 sock_put(newsk); 1505 newsk = NULL; 1506 } 1507 } 1508 1509 return newsk; 1510 1511 exit_overflow: 1512 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1513 exit_nonewsk: 1514 dst_release(dst); 1515 exit: 1516 tcp_listendrop(sk); 1517 return NULL; 1518 put_and_exit: 1519 inet_csk_prepare_forced_close(newsk); 1520 tcp_done(newsk); 1521 goto exit; 1522 } 1523 1524 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1525 u32)); 1526 /* The socket must have it's spinlock held when we get 1527 * here, unless it is a TCP_LISTEN socket. 1528 * 1529 * We have a potential double-lock case here, so even when 1530 * doing backlog processing we use the BH locking scheme. 1531 * This is because we cannot sleep with the original spinlock 1532 * held. 1533 */ 1534 INDIRECT_CALLABLE_SCOPE 1535 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1536 { 1537 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1538 struct sk_buff *opt_skb = NULL; 1539 enum skb_drop_reason reason; 1540 struct tcp_sock *tp; 1541 1542 /* Imagine: socket is IPv6. IPv4 packet arrives, 1543 goes to IPv4 receive handler and backlogged. 1544 From backlog it always goes here. Kerboom... 1545 Fortunately, tcp_rcv_established and rcv_established 1546 handle them correctly, but it is not case with 1547 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1548 */ 1549 1550 if (skb->protocol == htons(ETH_P_IP)) 1551 return tcp_v4_do_rcv(sk, skb); 1552 1553 reason = psp_sk_rx_policy_check(sk, skb); 1554 if (reason) 1555 goto err_discard; 1556 1557 /* 1558 * socket locking is here for SMP purposes as backlog rcv 1559 * is currently called with bh processing disabled. 1560 */ 1561 1562 /* Do Stevens' IPV6_PKTOPTIONS. 1563 1564 Yes, guys, it is the only place in our code, where we 1565 may make it not affecting IPv4. 1566 The rest of code is protocol independent, 1567 and I do not like idea to uglify IPv4. 1568 1569 Actually, all the idea behind IPV6_PKTOPTIONS 1570 looks not very well thought. For now we latch 1571 options, received in the last packet, enqueued 1572 by tcp. Feel free to propose better solution. 1573 --ANK (980728) 1574 */ 1575 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1576 opt_skb = skb_clone_and_charge_r(skb, sk); 1577 1578 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1579 struct dst_entry *dst; 1580 1581 dst = rcu_dereference_protected(sk->sk_rx_dst, 1582 lockdep_sock_is_held(sk)); 1583 1584 sock_rps_save_rxhash(sk, skb); 1585 sk_mark_napi_id(sk, skb); 1586 if (dst) { 1587 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1588 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1589 dst, sk->sk_rx_dst_cookie) == NULL) { 1590 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1591 dst_release(dst); 1592 } 1593 } 1594 1595 tcp_rcv_established(sk, skb); 1596 if (opt_skb) 1597 goto ipv6_pktoptions; 1598 return 0; 1599 } 1600 1601 if (tcp_checksum_complete(skb)) 1602 goto csum_err; 1603 1604 if (sk->sk_state == TCP_LISTEN) { 1605 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1606 1607 if (nsk != sk) { 1608 if (nsk) { 1609 reason = tcp_child_process(sk, nsk, skb); 1610 if (reason) 1611 goto reset; 1612 } 1613 return 0; 1614 } 1615 } else 1616 sock_rps_save_rxhash(sk, skb); 1617 1618 reason = tcp_rcv_state_process(sk, skb); 1619 if (reason) 1620 goto reset; 1621 if (opt_skb) 1622 goto ipv6_pktoptions; 1623 return 0; 1624 1625 reset: 1626 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1627 discard: 1628 if (opt_skb) 1629 __kfree_skb(opt_skb); 1630 sk_skb_reason_drop(sk, skb, reason); 1631 return 0; 1632 csum_err: 1633 reason = SKB_DROP_REASON_TCP_CSUM; 1634 trace_tcp_bad_csum(skb); 1635 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1636 err_discard: 1637 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1638 goto discard; 1639 1640 1641 ipv6_pktoptions: 1642 /* Do you ask, what is it? 1643 1644 1. skb was enqueued by tcp. 1645 2. skb is added to tail of read queue, rather than out of order. 1646 3. socket is not in passive state. 1647 4. Finally, it really contains options, which user wants to receive. 1648 */ 1649 tp = tcp_sk(sk); 1650 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1651 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1652 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1653 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1654 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1655 WRITE_ONCE(np->mcast_hops, 1656 ipv6_hdr(opt_skb)->hop_limit); 1657 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1658 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1659 if (inet6_test_bit(REPFLOW, sk)) 1660 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1661 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1662 tcp_v6_restore_cb(opt_skb); 1663 opt_skb = xchg(&np->pktoptions, opt_skb); 1664 } else { 1665 __kfree_skb(opt_skb); 1666 opt_skb = xchg(&np->pktoptions, NULL); 1667 } 1668 } 1669 1670 consume_skb(opt_skb); 1671 return 0; 1672 } 1673 1674 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1675 const struct tcphdr *th) 1676 { 1677 /* This is tricky: we move IP6CB at its correct location into 1678 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1679 * _decode_session6() uses IP6CB(). 1680 * barrier() makes sure compiler won't play aliasing games. 1681 */ 1682 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1683 sizeof(struct inet6_skb_parm)); 1684 barrier(); 1685 1686 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1687 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1688 skb->len - th->doff*4); 1689 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1690 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1691 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1692 TCP_SKB_CB(skb)->sacked = 0; 1693 TCP_SKB_CB(skb)->has_rxtstamp = 1694 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1695 } 1696 1697 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1698 { 1699 struct net *net = dev_net_rcu(skb->dev); 1700 enum skb_drop_reason drop_reason; 1701 enum tcp_tw_status tw_status; 1702 int sdif = inet6_sdif(skb); 1703 int dif = inet6_iif(skb); 1704 const struct tcphdr *th; 1705 const struct ipv6hdr *hdr; 1706 struct sock *sk = NULL; 1707 bool refcounted; 1708 int ret; 1709 u32 isn; 1710 1711 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1712 if (skb->pkt_type != PACKET_HOST) 1713 goto discard_it; 1714 1715 /* 1716 * Count it even if it's bad. 1717 */ 1718 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1719 1720 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1721 goto discard_it; 1722 1723 th = (const struct tcphdr *)skb->data; 1724 1725 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1726 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1727 goto bad_packet; 1728 } 1729 if (!pskb_may_pull(skb, th->doff*4)) 1730 goto discard_it; 1731 1732 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1733 goto csum_error; 1734 1735 th = (const struct tcphdr *)skb->data; 1736 hdr = ipv6_hdr(skb); 1737 1738 lookup: 1739 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1740 th->source, th->dest, inet6_iif(skb), sdif, 1741 &refcounted); 1742 if (!sk) 1743 goto no_tcp_socket; 1744 1745 if (sk->sk_state == TCP_TIME_WAIT) 1746 goto do_time_wait; 1747 1748 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1749 struct request_sock *req = inet_reqsk(sk); 1750 bool req_stolen = false; 1751 struct sock *nsk; 1752 1753 sk = req->rsk_listener; 1754 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1755 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1756 else 1757 drop_reason = tcp_inbound_hash(sk, req, skb, 1758 &hdr->saddr, &hdr->daddr, 1759 AF_INET6, dif, sdif); 1760 if (drop_reason) { 1761 sk_drops_skbadd(sk, skb); 1762 reqsk_put(req); 1763 goto discard_it; 1764 } 1765 if (tcp_checksum_complete(skb)) { 1766 reqsk_put(req); 1767 goto csum_error; 1768 } 1769 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1770 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1771 if (!nsk) { 1772 inet_csk_reqsk_queue_drop_and_put(sk, req); 1773 goto lookup; 1774 } 1775 sk = nsk; 1776 /* reuseport_migrate_sock() has already held one sk_refcnt 1777 * before returning. 1778 */ 1779 } else { 1780 sock_hold(sk); 1781 } 1782 refcounted = true; 1783 nsk = NULL; 1784 if (!tcp_filter(sk, skb, &drop_reason)) { 1785 th = (const struct tcphdr *)skb->data; 1786 hdr = ipv6_hdr(skb); 1787 tcp_v6_fill_cb(skb, hdr, th); 1788 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1789 &drop_reason); 1790 } 1791 if (!nsk) { 1792 reqsk_put(req); 1793 if (req_stolen) { 1794 /* Another cpu got exclusive access to req 1795 * and created a full blown socket. 1796 * Try to feed this packet to this socket 1797 * instead of discarding it. 1798 */ 1799 tcp_v6_restore_cb(skb); 1800 sock_put(sk); 1801 goto lookup; 1802 } 1803 goto discard_and_relse; 1804 } 1805 nf_reset_ct(skb); 1806 if (nsk == sk) { 1807 reqsk_put(req); 1808 tcp_v6_restore_cb(skb); 1809 } else { 1810 drop_reason = tcp_child_process(sk, nsk, skb); 1811 if (drop_reason) { 1812 enum sk_rst_reason rst_reason; 1813 1814 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1815 tcp_v6_send_reset(nsk, skb, rst_reason); 1816 goto discard_and_relse; 1817 } 1818 sock_put(sk); 1819 return 0; 1820 } 1821 } 1822 1823 process: 1824 if (static_branch_unlikely(&ip6_min_hopcount)) { 1825 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1826 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1827 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1828 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1829 goto discard_and_relse; 1830 } 1831 } 1832 1833 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1834 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1835 goto discard_and_relse; 1836 } 1837 1838 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1839 AF_INET6, dif, sdif); 1840 if (drop_reason) 1841 goto discard_and_relse; 1842 1843 nf_reset_ct(skb); 1844 1845 if (tcp_filter(sk, skb, &drop_reason)) 1846 goto discard_and_relse; 1847 1848 th = (const struct tcphdr *)skb->data; 1849 hdr = ipv6_hdr(skb); 1850 tcp_v6_fill_cb(skb, hdr, th); 1851 1852 skb->dev = NULL; 1853 1854 if (sk->sk_state == TCP_LISTEN) { 1855 ret = tcp_v6_do_rcv(sk, skb); 1856 goto put_and_return; 1857 } 1858 1859 sk_incoming_cpu_update(sk); 1860 1861 bh_lock_sock_nested(sk); 1862 tcp_segs_in(tcp_sk(sk), skb); 1863 ret = 0; 1864 if (!sock_owned_by_user(sk)) { 1865 ret = tcp_v6_do_rcv(sk, skb); 1866 } else { 1867 if (tcp_add_backlog(sk, skb, &drop_reason)) 1868 goto discard_and_relse; 1869 } 1870 bh_unlock_sock(sk); 1871 put_and_return: 1872 if (refcounted) 1873 sock_put(sk); 1874 return ret ? -1 : 0; 1875 1876 no_tcp_socket: 1877 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1878 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1879 goto discard_it; 1880 1881 tcp_v6_fill_cb(skb, hdr, th); 1882 1883 if (tcp_checksum_complete(skb)) { 1884 csum_error: 1885 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1886 trace_tcp_bad_csum(skb); 1887 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1888 bad_packet: 1889 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1890 } else { 1891 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1892 } 1893 1894 discard_it: 1895 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1896 sk_skb_reason_drop(sk, skb, drop_reason); 1897 return 0; 1898 1899 discard_and_relse: 1900 sk_drops_skbadd(sk, skb); 1901 if (refcounted) 1902 sock_put(sk); 1903 goto discard_it; 1904 1905 do_time_wait: 1906 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1907 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1908 inet_twsk_put(inet_twsk(sk)); 1909 goto discard_it; 1910 } 1911 1912 tcp_v6_fill_cb(skb, hdr, th); 1913 1914 if (tcp_checksum_complete(skb)) { 1915 inet_twsk_put(inet_twsk(sk)); 1916 goto csum_error; 1917 } 1918 1919 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1920 &drop_reason); 1921 switch (tw_status) { 1922 case TCP_TW_SYN: 1923 { 1924 struct sock *sk2; 1925 1926 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1927 &ipv6_hdr(skb)->saddr, th->source, 1928 &ipv6_hdr(skb)->daddr, 1929 ntohs(th->dest), 1930 tcp_v6_iif_l3_slave(skb), 1931 sdif); 1932 if (sk2) { 1933 struct inet_timewait_sock *tw = inet_twsk(sk); 1934 inet_twsk_deschedule_put(tw); 1935 sk = sk2; 1936 tcp_v6_restore_cb(skb); 1937 refcounted = false; 1938 __this_cpu_write(tcp_tw_isn, isn); 1939 goto process; 1940 } 1941 1942 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1943 if (drop_reason) 1944 break; 1945 } 1946 /* to ACK */ 1947 fallthrough; 1948 case TCP_TW_ACK: 1949 case TCP_TW_ACK_OOW: 1950 tcp_v6_timewait_ack(sk, skb, tw_status); 1951 break; 1952 case TCP_TW_RST: 1953 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1954 inet_twsk_deschedule_put(inet_twsk(sk)); 1955 goto discard_it; 1956 case TCP_TW_SUCCESS: 1957 ; 1958 } 1959 goto discard_it; 1960 } 1961 1962 void tcp_v6_early_demux(struct sk_buff *skb) 1963 { 1964 struct net *net = dev_net_rcu(skb->dev); 1965 const struct ipv6hdr *hdr; 1966 const struct tcphdr *th; 1967 struct sock *sk; 1968 1969 if (skb->pkt_type != PACKET_HOST) 1970 return; 1971 1972 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1973 return; 1974 1975 hdr = ipv6_hdr(skb); 1976 th = tcp_hdr(skb); 1977 1978 if (th->doff < sizeof(struct tcphdr) / 4) 1979 return; 1980 1981 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1982 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1983 &hdr->daddr, ntohs(th->dest), 1984 inet6_iif(skb), inet6_sdif(skb)); 1985 if (sk) { 1986 skb->sk = sk; 1987 skb->destructor = sock_edemux; 1988 if (sk_fullsock(sk)) { 1989 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1990 1991 if (dst) 1992 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1993 if (dst && 1994 sk->sk_rx_dst_ifindex == skb->skb_iif) 1995 skb_dst_set_noref(skb, dst); 1996 } 1997 } 1998 } 1999 2000 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2001 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2002 }; 2003 2004 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2005 { 2006 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2007 } 2008 2009 const struct inet_connection_sock_af_ops ipv6_specific = { 2010 .queue_xmit = inet6_csk_xmit, 2011 .send_check = tcp_v6_send_check, 2012 .rebuild_header = inet6_sk_rebuild_header, 2013 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2014 .conn_request = tcp_v6_conn_request, 2015 .syn_recv_sock = tcp_v6_syn_recv_sock, 2016 .net_header_len = sizeof(struct ipv6hdr), 2017 .setsockopt = ipv6_setsockopt, 2018 .getsockopt = ipv6_getsockopt, 2019 .mtu_reduced = tcp_v6_mtu_reduced, 2020 }; 2021 2022 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2023 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2024 #ifdef CONFIG_TCP_MD5SIG 2025 .md5_lookup = tcp_v6_md5_lookup, 2026 .calc_md5_hash = tcp_v6_md5_hash_skb, 2027 .md5_parse = tcp_v6_parse_md5_keys, 2028 #endif 2029 #ifdef CONFIG_TCP_AO 2030 .ao_lookup = tcp_v6_ao_lookup, 2031 .calc_ao_hash = tcp_v6_ao_hash_skb, 2032 .ao_parse = tcp_v6_parse_ao, 2033 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2034 #endif 2035 }; 2036 #endif 2037 2038 /* 2039 * TCP over IPv4 via INET6 API 2040 */ 2041 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2042 .queue_xmit = ip_queue_xmit, 2043 .send_check = tcp_v4_send_check, 2044 .rebuild_header = inet_sk_rebuild_header, 2045 .sk_rx_dst_set = inet_sk_rx_dst_set, 2046 .conn_request = tcp_v6_conn_request, 2047 .syn_recv_sock = tcp_v6_syn_recv_sock, 2048 .net_header_len = sizeof(struct iphdr), 2049 .setsockopt = ipv6_setsockopt, 2050 .getsockopt = ipv6_getsockopt, 2051 .mtu_reduced = tcp_v4_mtu_reduced, 2052 }; 2053 2054 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2055 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2056 #ifdef CONFIG_TCP_MD5SIG 2057 .md5_lookup = tcp_v4_md5_lookup, 2058 .calc_md5_hash = tcp_v4_md5_hash_skb, 2059 .md5_parse = tcp_v6_parse_md5_keys, 2060 #endif 2061 #ifdef CONFIG_TCP_AO 2062 .ao_lookup = tcp_v6_ao_lookup, 2063 .calc_ao_hash = tcp_v4_ao_hash_skb, 2064 .ao_parse = tcp_v6_parse_ao, 2065 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2066 #endif 2067 }; 2068 2069 static void tcp6_destruct_sock(struct sock *sk) 2070 { 2071 tcp_md5_destruct_sock(sk); 2072 tcp_ao_destroy_sock(sk, false); 2073 inet6_sock_destruct(sk); 2074 } 2075 #endif 2076 2077 /* NOTE: A lot of things set to zero explicitly by call to 2078 * sk_alloc() so need not be done here. 2079 */ 2080 static int tcp_v6_init_sock(struct sock *sk) 2081 { 2082 struct inet_connection_sock *icsk = inet_csk(sk); 2083 2084 tcp_init_sock(sk); 2085 2086 icsk->icsk_af_ops = &ipv6_specific; 2087 2088 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2089 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2090 sk->sk_destruct = tcp6_destruct_sock; 2091 #endif 2092 2093 return 0; 2094 } 2095 2096 #ifdef CONFIG_PROC_FS 2097 /* Proc filesystem TCPv6 sock list dumping. */ 2098 static void get_openreq6(struct seq_file *seq, 2099 const struct request_sock *req, int i) 2100 { 2101 long ttd = req->rsk_timer.expires - jiffies; 2102 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2103 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2104 2105 if (ttd < 0) 2106 ttd = 0; 2107 2108 seq_printf(seq, 2109 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2110 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2111 i, 2112 src->s6_addr32[0], src->s6_addr32[1], 2113 src->s6_addr32[2], src->s6_addr32[3], 2114 inet_rsk(req)->ir_num, 2115 dest->s6_addr32[0], dest->s6_addr32[1], 2116 dest->s6_addr32[2], dest->s6_addr32[3], 2117 ntohs(inet_rsk(req)->ir_rmt_port), 2118 TCP_SYN_RECV, 2119 0, 0, /* could print option size, but that is af dependent. */ 2120 1, /* timers active (only the expire timer) */ 2121 jiffies_to_clock_t(ttd), 2122 req->num_timeout, 2123 from_kuid_munged(seq_user_ns(seq), 2124 sk_uid(req->rsk_listener)), 2125 0, /* non standard timer */ 2126 0, /* open_requests have no inode */ 2127 0, req); 2128 } 2129 2130 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2131 { 2132 const struct in6_addr *dest, *src; 2133 __u16 destp, srcp; 2134 int timer_active; 2135 unsigned long timer_expires; 2136 const struct inet_sock *inet = inet_sk(sp); 2137 const struct tcp_sock *tp = tcp_sk(sp); 2138 const struct inet_connection_sock *icsk = inet_csk(sp); 2139 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2140 u8 icsk_pending; 2141 int rx_queue; 2142 int state; 2143 2144 dest = &sp->sk_v6_daddr; 2145 src = &sp->sk_v6_rcv_saddr; 2146 destp = ntohs(inet->inet_dport); 2147 srcp = ntohs(inet->inet_sport); 2148 2149 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2150 if (icsk_pending == ICSK_TIME_RETRANS || 2151 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2152 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2153 timer_active = 1; 2154 timer_expires = tcp_timeout_expires(sp); 2155 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2156 timer_active = 4; 2157 timer_expires = tcp_timeout_expires(sp); 2158 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2159 timer_active = 2; 2160 timer_expires = icsk->icsk_keepalive_timer.expires; 2161 } else { 2162 timer_active = 0; 2163 timer_expires = jiffies; 2164 } 2165 2166 state = inet_sk_state_load(sp); 2167 if (state == TCP_LISTEN) 2168 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2169 else 2170 /* Because we don't lock the socket, 2171 * we might find a transient negative value. 2172 */ 2173 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2174 READ_ONCE(tp->copied_seq), 0); 2175 2176 seq_printf(seq, 2177 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2178 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2179 i, 2180 src->s6_addr32[0], src->s6_addr32[1], 2181 src->s6_addr32[2], src->s6_addr32[3], srcp, 2182 dest->s6_addr32[0], dest->s6_addr32[1], 2183 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2184 state, 2185 READ_ONCE(tp->write_seq) - tp->snd_una, 2186 rx_queue, 2187 timer_active, 2188 jiffies_delta_to_clock_t(timer_expires - jiffies), 2189 READ_ONCE(icsk->icsk_retransmits), 2190 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2191 READ_ONCE(icsk->icsk_probes_out), 2192 sock_i_ino(sp), 2193 refcount_read(&sp->sk_refcnt), sp, 2194 jiffies_to_clock_t(icsk->icsk_rto), 2195 jiffies_to_clock_t(icsk->icsk_ack.ato), 2196 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2197 tcp_snd_cwnd(tp), 2198 state == TCP_LISTEN ? 2199 fastopenq->max_qlen : 2200 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2201 ); 2202 } 2203 2204 static void get_timewait6_sock(struct seq_file *seq, 2205 struct inet_timewait_sock *tw, int i) 2206 { 2207 long delta = tw->tw_timer.expires - jiffies; 2208 const struct in6_addr *dest, *src; 2209 __u16 destp, srcp; 2210 2211 dest = &tw->tw_v6_daddr; 2212 src = &tw->tw_v6_rcv_saddr; 2213 destp = ntohs(tw->tw_dport); 2214 srcp = ntohs(tw->tw_sport); 2215 2216 seq_printf(seq, 2217 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2218 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2219 i, 2220 src->s6_addr32[0], src->s6_addr32[1], 2221 src->s6_addr32[2], src->s6_addr32[3], srcp, 2222 dest->s6_addr32[0], dest->s6_addr32[1], 2223 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2224 READ_ONCE(tw->tw_substate), 0, 0, 2225 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2226 refcount_read(&tw->tw_refcnt), tw); 2227 } 2228 2229 static int tcp6_seq_show(struct seq_file *seq, void *v) 2230 { 2231 struct tcp_iter_state *st; 2232 struct sock *sk = v; 2233 2234 if (v == SEQ_START_TOKEN) { 2235 seq_puts(seq, 2236 " sl " 2237 "local_address " 2238 "remote_address " 2239 "st tx_queue rx_queue tr tm->when retrnsmt" 2240 " uid timeout inode\n"); 2241 goto out; 2242 } 2243 st = seq->private; 2244 2245 if (sk->sk_state == TCP_TIME_WAIT) 2246 get_timewait6_sock(seq, v, st->num); 2247 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2248 get_openreq6(seq, v, st->num); 2249 else 2250 get_tcp6_sock(seq, v, st->num); 2251 out: 2252 return 0; 2253 } 2254 2255 static const struct seq_operations tcp6_seq_ops = { 2256 .show = tcp6_seq_show, 2257 .start = tcp_seq_start, 2258 .next = tcp_seq_next, 2259 .stop = tcp_seq_stop, 2260 }; 2261 2262 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2263 .family = AF_INET6, 2264 }; 2265 2266 int __net_init tcp6_proc_init(struct net *net) 2267 { 2268 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2269 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2270 return -ENOMEM; 2271 return 0; 2272 } 2273 2274 void tcp6_proc_exit(struct net *net) 2275 { 2276 remove_proc_entry("tcp6", net->proc_net); 2277 } 2278 #endif 2279 2280 struct proto tcpv6_prot = { 2281 .name = "TCPv6", 2282 .owner = THIS_MODULE, 2283 .close = tcp_close, 2284 .pre_connect = tcp_v6_pre_connect, 2285 .connect = tcp_v6_connect, 2286 .disconnect = tcp_disconnect, 2287 .accept = inet_csk_accept, 2288 .ioctl = tcp_ioctl, 2289 .init = tcp_v6_init_sock, 2290 .destroy = tcp_v4_destroy_sock, 2291 .shutdown = tcp_shutdown, 2292 .setsockopt = tcp_setsockopt, 2293 .getsockopt = tcp_getsockopt, 2294 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2295 .keepalive = tcp_set_keepalive, 2296 .recvmsg = tcp_recvmsg, 2297 .sendmsg = tcp_sendmsg, 2298 .splice_eof = tcp_splice_eof, 2299 .backlog_rcv = tcp_v6_do_rcv, 2300 .release_cb = tcp_release_cb, 2301 .hash = inet_hash, 2302 .unhash = inet_unhash, 2303 .get_port = inet_csk_get_port, 2304 .put_port = inet_put_port, 2305 #ifdef CONFIG_BPF_SYSCALL 2306 .psock_update_sk_prot = tcp_bpf_update_proto, 2307 #endif 2308 .enter_memory_pressure = tcp_enter_memory_pressure, 2309 .leave_memory_pressure = tcp_leave_memory_pressure, 2310 .stream_memory_free = tcp_stream_memory_free, 2311 .sockets_allocated = &tcp_sockets_allocated, 2312 2313 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2314 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2315 2316 .memory_pressure = &tcp_memory_pressure, 2317 .sysctl_mem = sysctl_tcp_mem, 2318 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2319 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2320 .max_header = MAX_TCP_HEADER, 2321 .obj_size = sizeof(struct tcp6_sock), 2322 .freeptr_offset = offsetof(struct tcp6_sock, 2323 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2324 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2325 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2326 .twsk_prot = &tcp6_timewait_sock_ops, 2327 .rsk_prot = &tcp6_request_sock_ops, 2328 .h.hashinfo = NULL, 2329 .no_autobind = true, 2330 .diag_destroy = tcp_abort, 2331 }; 2332 EXPORT_SYMBOL_GPL(tcpv6_prot); 2333 2334 2335 static struct inet_protosw tcpv6_protosw = { 2336 .type = SOCK_STREAM, 2337 .protocol = IPPROTO_TCP, 2338 .prot = &tcpv6_prot, 2339 .ops = &inet6_stream_ops, 2340 .flags = INET_PROTOSW_PERMANENT | 2341 INET_PROTOSW_ICSK, 2342 }; 2343 2344 static int __net_init tcpv6_net_init(struct net *net) 2345 { 2346 int res; 2347 2348 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2349 SOCK_RAW, IPPROTO_TCP, net); 2350 if (!res) 2351 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2352 2353 return res; 2354 } 2355 2356 static void __net_exit tcpv6_net_exit(struct net *net) 2357 { 2358 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2359 } 2360 2361 static struct pernet_operations tcpv6_net_ops = { 2362 .init = tcpv6_net_init, 2363 .exit = tcpv6_net_exit, 2364 }; 2365 2366 int __init tcpv6_init(void) 2367 { 2368 int ret; 2369 2370 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2371 .handler = tcp_v6_rcv, 2372 .err_handler = tcp_v6_err, 2373 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2374 }; 2375 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2376 if (ret) 2377 goto out; 2378 2379 /* register inet6 protocol */ 2380 ret = inet6_register_protosw(&tcpv6_protosw); 2381 if (ret) 2382 goto out_tcpv6_protocol; 2383 2384 ret = register_pernet_subsys(&tcpv6_net_ops); 2385 if (ret) 2386 goto out_tcpv6_protosw; 2387 2388 ret = mptcpv6_init(); 2389 if (ret) 2390 goto out_tcpv6_pernet_subsys; 2391 2392 out: 2393 return ret; 2394 2395 out_tcpv6_pernet_subsys: 2396 unregister_pernet_subsys(&tcpv6_net_ops); 2397 out_tcpv6_protosw: 2398 inet6_unregister_protosw(&tcpv6_protosw); 2399 out_tcpv6_protocol: 2400 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2401 goto out; 2402 } 2403 2404 void tcpv6_exit(void) 2405 { 2406 unregister_pernet_subsys(&tcpv6_net_ops); 2407 inet6_unregister_protosw(&tcpv6_protosw); 2408 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2409 } 2410