11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 751a2449a8SChris Leech #include <net/netdma.h> 766e5714eaSDavid S. Miller #include <net/secure_seq.h> 77d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <linux/inet.h> 801da177e4SLinus Torvalds #include <linux/ipv6.h> 811da177e4SLinus Torvalds #include <linux/stddef.h> 821da177e4SLinus Torvalds #include <linux/proc_fs.h> 831da177e4SLinus Torvalds #include <linux/seq_file.h> 841da177e4SLinus Torvalds 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 93cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 94a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 95318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 96cfb6eeb4SYOSHIFUJI Hideaki #endif 97cfb6eeb4SYOSHIFUJI Hideaki 985caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 1001da177e4SLinus Torvalds 101cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1021da177e4SLinus Torvalds { 103eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 104eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 106aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1106d6ee43eSArnaldo Carvalho de Melo { 1116d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1126d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1136d6ee43eSArnaldo Carvalho de Melo 1146d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1156d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1166d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1176d6ee43eSArnaldo Carvalho de Melo 1186d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1196d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1206d6ee43eSArnaldo Carvalho de Melo holder. 1216d6ee43eSArnaldo Carvalho de Melo 1226d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1236d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1246d6ee43eSArnaldo Carvalho de Melo */ 1256d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1266d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1279d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1286d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1296d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1306d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1326d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1336d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1346d6ee43eSArnaldo Carvalho de Melo return 1; 1356d6ee43eSArnaldo Carvalho de Melo } 1366d6ee43eSArnaldo Carvalho de Melo 1376d6ee43eSArnaldo Carvalho de Melo return 0; 1386d6ee43eSArnaldo Carvalho de Melo } 1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1406d6ee43eSArnaldo Carvalho de Melo 141ee995283SPavel Emelyanov static int tcp_repair_connect(struct sock *sk) 142ee995283SPavel Emelyanov { 143ee995283SPavel Emelyanov tcp_connect_init(sk); 144ee995283SPavel Emelyanov tcp_finish_connect(sk, NULL); 145ee995283SPavel Emelyanov 146ee995283SPavel Emelyanov return 0; 147ee995283SPavel Emelyanov } 148ee995283SPavel Emelyanov 1491da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1501da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1511da177e4SLinus Torvalds { 1522d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1531da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1541da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 155dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 156bada8adcSAl Viro __be32 daddr, nexthop; 157da905bd1SDavid S. Miller struct flowi4 *fl4; 1582d7192d6SDavid S. Miller struct rtable *rt; 1591da177e4SLinus Torvalds int err; 160f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1631da177e4SLinus Torvalds return -EINVAL; 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1661da177e4SLinus Torvalds return -EAFNOSUPPORT; 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 169f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 170f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 171f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1721da177e4SLinus Torvalds if (!daddr) 1731da177e4SLinus Torvalds return -EINVAL; 174f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds 177dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 178dca8b089SDavid S. Miller orig_dport = usin->sin_port; 179da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 180da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1811da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1821da177e4SLinus Torvalds IPPROTO_TCP, 183abdf7e72SDavid S. Miller orig_sport, orig_dport, sk, true); 184b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 185b23dd4feSDavid S. Miller err = PTR_ERR(rt); 186b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 1877c73a6faSPavel Emelyanov IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 188b23dd4feSDavid S. Miller return err; 189584bdf8cSWei Dong } 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1921da177e4SLinus Torvalds ip_rt_put(rt); 1931da177e4SLinus Torvalds return -ENETUNREACH; 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 196f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 197da905bd1SDavid S. Miller daddr = fl4->daddr; 1981da177e4SLinus Torvalds 199c720c7e8SEric Dumazet if (!inet->inet_saddr) 200da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 201c720c7e8SEric Dumazet inet->inet_rcv_saddr = inet->inet_saddr; 2021da177e4SLinus Torvalds 203c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 2041da177e4SLinus Torvalds /* Reset inherited state */ 2051da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 2061da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 207ee995283SPavel Emelyanov if (likely(!tp->repair)) 2081da177e4SLinus Torvalds tp->write_seq = 0; 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds 211295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 21281166dd6SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) 21381166dd6SDavid S. Miller tcp_fetch_timewait_stamp(sk, &rt->dst); 2141da177e4SLinus Torvalds 215c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 216c720c7e8SEric Dumazet inet->inet_daddr = daddr; 2171da177e4SLinus Torvalds 218d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 219f6d8bd05SEric Dumazet if (inet_opt) 220f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2211da177e4SLinus Torvalds 222bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2231da177e4SLinus Torvalds 2241da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2251da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2261da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2271da177e4SLinus Torvalds * complete initialization after this. 2281da177e4SLinus Torvalds */ 2291da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 230a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2311da177e4SLinus Torvalds if (err) 2321da177e4SLinus Torvalds goto failure; 2331da177e4SLinus Torvalds 234da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 235c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 236b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 237b23dd4feSDavid S. Miller err = PTR_ERR(rt); 238b23dd4feSDavid S. Miller rt = NULL; 2391da177e4SLinus Torvalds goto failure; 240b23dd4feSDavid S. Miller } 2411da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 242bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 243d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2441da177e4SLinus Torvalds 245ee995283SPavel Emelyanov if (!tp->write_seq && likely(!tp->repair)) 246c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 247c720c7e8SEric Dumazet inet->inet_daddr, 248c720c7e8SEric Dumazet inet->inet_sport, 2491da177e4SLinus Torvalds usin->sin_port); 2501da177e4SLinus Torvalds 251c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2521da177e4SLinus Torvalds 253ee995283SPavel Emelyanov if (likely(!tp->repair)) 2541da177e4SLinus Torvalds err = tcp_connect(sk); 255ee995283SPavel Emelyanov else 256ee995283SPavel Emelyanov err = tcp_repair_connect(sk); 257ee995283SPavel Emelyanov 2581da177e4SLinus Torvalds rt = NULL; 2591da177e4SLinus Torvalds if (err) 2601da177e4SLinus Torvalds goto failure; 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds return 0; 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds failure: 2657174259eSArnaldo Carvalho de Melo /* 2667174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2677174259eSArnaldo Carvalho de Melo * if necessary. 2687174259eSArnaldo Carvalho de Melo */ 2691da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2701da177e4SLinus Torvalds ip_rt_put(rt); 2711da177e4SLinus Torvalds sk->sk_route_caps = 0; 272c720c7e8SEric Dumazet inet->inet_dport = 0; 2731da177e4SLinus Torvalds return err; 2741da177e4SLinus Torvalds } 2754bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* 278563d34d0SEric Dumazet * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 279563d34d0SEric Dumazet * It can be called through tcp_release_cb() if socket was owned by user 280563d34d0SEric Dumazet * at the time tcp_v4_err() was called to handle ICMP message. 2811da177e4SLinus Torvalds */ 282563d34d0SEric Dumazet static void tcp_v4_mtu_reduced(struct sock *sk) 2831da177e4SLinus Torvalds { 2841da177e4SLinus Torvalds struct dst_entry *dst; 2851da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 286563d34d0SEric Dumazet u32 mtu = tcp_sk(sk)->mtu_info; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 2891da177e4SLinus Torvalds * send out by Linux are always <576bytes so they should go through 2901da177e4SLinus Torvalds * unfragmented). 2911da177e4SLinus Torvalds */ 2921da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) 2931da177e4SLinus Torvalds return; 2941da177e4SLinus Torvalds 29580d0a69fSDavid S. Miller dst = inet_csk_update_pmtu(sk, mtu); 29680d0a69fSDavid S. Miller if (!dst) 2971da177e4SLinus Torvalds return; 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 3001da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 3011da177e4SLinus Torvalds */ 3021da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 3031da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds mtu = dst_mtu(dst); 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 308d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3091da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds /* Resend the TCP packet because it's 3121da177e4SLinus Torvalds * clear that the old packet has been 3131da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3141da177e4SLinus Torvalds * discovery. 3151da177e4SLinus Torvalds */ 3161da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3171da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds 32055be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk) 32155be7a9cSDavid S. Miller { 32255be7a9cSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 32355be7a9cSDavid S. Miller 3241ed5c48fSDavid S. Miller if (dst) 3256700c270SDavid S. Miller dst->ops->redirect(dst, sk, skb); 32655be7a9cSDavid S. Miller } 32755be7a9cSDavid S. Miller 3281da177e4SLinus Torvalds /* 3291da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3301da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3311da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3321da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3331da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3341da177e4SLinus Torvalds * to find the appropriate port. 3351da177e4SLinus Torvalds * 3361da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3371da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3381da177e4SLinus Torvalds * and for some paths there is no check at all. 3391da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3401da177e4SLinus Torvalds * is probably better. 3411da177e4SLinus Torvalds * 3421da177e4SLinus Torvalds */ 3431da177e4SLinus Torvalds 3444d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3451da177e4SLinus Torvalds { 346b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3474d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 348f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3491da177e4SLinus Torvalds struct tcp_sock *tp; 3501da177e4SLinus Torvalds struct inet_sock *inet; 3514d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3524d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3531da177e4SLinus Torvalds struct sock *sk; 354f1ecd5d9SDamian Lukowski struct sk_buff *skb; 355168a8f58SJerry Chu struct request_sock *req; 3561da177e4SLinus Torvalds __u32 seq; 357f1ecd5d9SDamian Lukowski __u32 remaining; 3581da177e4SLinus Torvalds int err; 3594d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3601da177e4SLinus Torvalds 3614d1a2d9eSDamian Lukowski if (icmp_skb->len < (iph->ihl << 2) + 8) { 362dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3631da177e4SLinus Torvalds return; 3641da177e4SLinus Torvalds } 3651da177e4SLinus Torvalds 366fd54d716SPavel Emelyanov sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 3674d1a2d9eSDamian Lukowski iph->saddr, th->source, inet_iif(icmp_skb)); 3681da177e4SLinus Torvalds if (!sk) { 369dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3701da177e4SLinus Torvalds return; 3711da177e4SLinus Torvalds } 3721da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3739469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3741da177e4SLinus Torvalds return; 3751da177e4SLinus Torvalds } 3761da177e4SLinus Torvalds 3771da177e4SLinus Torvalds bh_lock_sock(sk); 3781da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3791da177e4SLinus Torvalds * servers this needs to be solved differently. 380563d34d0SEric Dumazet * We do take care of PMTU discovery (RFC1191) special case : 381563d34d0SEric Dumazet * we can receive locally generated ICMP messages while socket is held. 3821da177e4SLinus Torvalds */ 383563d34d0SEric Dumazet if (sock_owned_by_user(sk) && 384563d34d0SEric Dumazet type != ICMP_DEST_UNREACH && 385563d34d0SEric Dumazet code != ICMP_FRAG_NEEDED) 386de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 3871da177e4SLinus Torvalds 3881da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3891da177e4SLinus Torvalds goto out; 3901da177e4SLinus Torvalds 39197e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 39297e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 39397e3ecd1Sstephen hemminger goto out; 39497e3ecd1Sstephen hemminger } 39597e3ecd1Sstephen hemminger 396f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 3971da177e4SLinus Torvalds tp = tcp_sk(sk); 398168a8f58SJerry Chu req = tp->fastopen_rsk; 3991da177e4SLinus Torvalds seq = ntohl(th->seq); 4001da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 401168a8f58SJerry Chu !between(seq, tp->snd_una, tp->snd_nxt) && 402168a8f58SJerry Chu (req == NULL || seq != tcp_rsk(req)->snt_isn)) { 403168a8f58SJerry Chu /* For a Fast Open socket, allow seq to be snt_isn. */ 404de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4051da177e4SLinus Torvalds goto out; 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds switch (type) { 40955be7a9cSDavid S. Miller case ICMP_REDIRECT: 41055be7a9cSDavid S. Miller do_redirect(icmp_skb, sk); 41155be7a9cSDavid S. Miller goto out; 4121da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 4131da177e4SLinus Torvalds /* Just silently ignore these. */ 4141da177e4SLinus Torvalds goto out; 4151da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4161da177e4SLinus Torvalds err = EPROTO; 4171da177e4SLinus Torvalds break; 4181da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4191da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4201da177e4SLinus Torvalds goto out; 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 423563d34d0SEric Dumazet tp->mtu_info = info; 424144d56e9SEric Dumazet if (!sock_owned_by_user(sk)) { 425563d34d0SEric Dumazet tcp_v4_mtu_reduced(sk); 426144d56e9SEric Dumazet } else { 427144d56e9SEric Dumazet if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) 428144d56e9SEric Dumazet sock_hold(sk); 429144d56e9SEric Dumazet } 4301da177e4SLinus Torvalds goto out; 4311da177e4SLinus Torvalds } 4321da177e4SLinus Torvalds 4331da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 434f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 435f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 436f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 437f1ecd5d9SDamian Lukowski break; 438f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 439f1ecd5d9SDamian Lukowski !icsk->icsk_backoff) 440f1ecd5d9SDamian Lukowski break; 441f1ecd5d9SDamian Lukowski 442168a8f58SJerry Chu /* XXX (TFO) - revisit the following logic for TFO */ 443168a8f58SJerry Chu 4448f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4458f49c270SDavid S. Miller break; 4468f49c270SDavid S. Miller 447f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 4489ad7c049SJerry Chu inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : 4499ad7c049SJerry Chu TCP_TIMEOUT_INIT) << icsk->icsk_backoff; 450f1ecd5d9SDamian Lukowski tcp_bound_rto(sk); 451f1ecd5d9SDamian Lukowski 452f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 453f1ecd5d9SDamian Lukowski BUG_ON(!skb); 454f1ecd5d9SDamian Lukowski 455f1ecd5d9SDamian Lukowski remaining = icsk->icsk_rto - min(icsk->icsk_rto, 456f1ecd5d9SDamian Lukowski tcp_time_stamp - TCP_SKB_CB(skb)->when); 457f1ecd5d9SDamian Lukowski 458f1ecd5d9SDamian Lukowski if (remaining) { 459f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 460f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 461f1ecd5d9SDamian Lukowski } else { 462f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 463f1ecd5d9SDamian Lukowski * Will retransmit now */ 464f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 465f1ecd5d9SDamian Lukowski } 466f1ecd5d9SDamian Lukowski 4671da177e4SLinus Torvalds break; 4681da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4691da177e4SLinus Torvalds err = EHOSTUNREACH; 4701da177e4SLinus Torvalds break; 4711da177e4SLinus Torvalds default: 4721da177e4SLinus Torvalds goto out; 4731da177e4SLinus Torvalds } 4741da177e4SLinus Torvalds 475168a8f58SJerry Chu /* XXX (TFO) - if it's a TFO socket and has been accepted, rather 476168a8f58SJerry Chu * than following the TCP_SYN_RECV case and closing the socket, 477168a8f58SJerry Chu * we ignore the ICMP error and keep trying like a fully established 478168a8f58SJerry Chu * socket. Is this the right thing to do? 479168a8f58SJerry Chu */ 480168a8f58SJerry Chu if (req && req->sk == NULL) 481168a8f58SJerry Chu goto out; 482168a8f58SJerry Chu 4831da177e4SLinus Torvalds switch (sk->sk_state) { 48460236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4851da177e4SLinus Torvalds case TCP_LISTEN: 4861da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4871da177e4SLinus Torvalds goto out; 4881da177e4SLinus Torvalds 489463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4901da177e4SLinus Torvalds iph->daddr, iph->saddr); 4911da177e4SLinus Torvalds if (!req) 4921da177e4SLinus Torvalds goto out; 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4951da177e4SLinus Torvalds an established socket here. 4961da177e4SLinus Torvalds */ 497547b792cSIlpo Järvinen WARN_ON(req->sk); 4981da177e4SLinus Torvalds 4992e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 500de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 5011da177e4SLinus Torvalds goto out; 5021da177e4SLinus Torvalds } 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds /* 5051da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 5061da177e4SLinus Torvalds * There is no good way to pass the error to the newly 5071da177e4SLinus Torvalds * created socket, and POSIX does not want network 5081da177e4SLinus Torvalds * errors returned from accept(). 5091da177e4SLinus Torvalds */ 510463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 5111da177e4SLinus Torvalds goto out; 5121da177e4SLinus Torvalds 5131da177e4SLinus Torvalds case TCP_SYN_SENT: 5141da177e4SLinus Torvalds case TCP_SYN_RECV: /* Cannot happen. 515168a8f58SJerry Chu It can f.e. if SYNs crossed, 516168a8f58SJerry Chu or Fast Open. 5171da177e4SLinus Torvalds */ 5181da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5191da177e4SLinus Torvalds sk->sk_err = err; 5201da177e4SLinus Torvalds 5211da177e4SLinus Torvalds sk->sk_error_report(sk); 5221da177e4SLinus Torvalds 5231da177e4SLinus Torvalds tcp_done(sk); 5241da177e4SLinus Torvalds } else { 5251da177e4SLinus Torvalds sk->sk_err_soft = err; 5261da177e4SLinus Torvalds } 5271da177e4SLinus Torvalds goto out; 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds /* If we've already connected we will keep trying 5311da177e4SLinus Torvalds * until we time out, or the user gives up. 5321da177e4SLinus Torvalds * 5331da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5341da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5351da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5361da177e4SLinus Torvalds * 5371da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5381da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5391da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5401da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5411da177e4SLinus Torvalds * 5421da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5431da177e4SLinus Torvalds * --ANK (980905) 5441da177e4SLinus Torvalds */ 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds inet = inet_sk(sk); 5471da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5481da177e4SLinus Torvalds sk->sk_err = err; 5491da177e4SLinus Torvalds sk->sk_error_report(sk); 5501da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5511da177e4SLinus Torvalds sk->sk_err_soft = err; 5521da177e4SLinus Torvalds } 5531da177e4SLinus Torvalds 5541da177e4SLinus Torvalds out: 5551da177e4SLinus Torvalds bh_unlock_sock(sk); 5561da177e4SLinus Torvalds sock_put(sk); 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds 559419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb, 560419f9f89SHerbert Xu __be32 saddr, __be32 daddr) 5611da177e4SLinus Torvalds { 562aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5631da177e4SLinus Torvalds 56484fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 565419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 566663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 567ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5681da177e4SLinus Torvalds } else { 569419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 57007f0757aSJoe Perches csum_partial(th, 5711da177e4SLinus Torvalds th->doff << 2, 5721da177e4SLinus Torvalds skb->csum)); 5731da177e4SLinus Torvalds } 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds 576419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 577bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 578419f9f89SHerbert Xu { 579cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 580419f9f89SHerbert Xu 581419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 582419f9f89SHerbert Xu } 5834bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 584419f9f89SHerbert Xu 585a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb) 586a430a43dSHerbert Xu { 587eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 588a430a43dSHerbert Xu struct tcphdr *th; 589a430a43dSHerbert Xu 590a430a43dSHerbert Xu if (!pskb_may_pull(skb, sizeof(*th))) 591a430a43dSHerbert Xu return -EINVAL; 592a430a43dSHerbert Xu 593eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 594aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 595a430a43dSHerbert Xu 596a430a43dSHerbert Xu th->check = 0; 59784fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_PARTIAL; 598419f9f89SHerbert Xu __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 599a430a43dSHerbert Xu return 0; 600a430a43dSHerbert Xu } 601a430a43dSHerbert Xu 6021da177e4SLinus Torvalds /* 6031da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 6041da177e4SLinus Torvalds * 6051da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 6061da177e4SLinus Torvalds * for reset. 6071da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 6081da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 6091da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 6101da177e4SLinus Torvalds * So that we build reply only basing on parameters 6111da177e4SLinus Torvalds * arrived with segment. 6121da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 6131da177e4SLinus Torvalds */ 6141da177e4SLinus Torvalds 615cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 6161da177e4SLinus Torvalds { 617cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 618cfb6eeb4SYOSHIFUJI Hideaki struct { 619cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 620cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 621714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 622cfb6eeb4SYOSHIFUJI Hideaki #endif 623cfb6eeb4SYOSHIFUJI Hideaki } rep; 6241da177e4SLinus Torvalds struct ip_reply_arg arg; 625cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 626cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 627658ddaafSShawn Lu const __u8 *hash_location = NULL; 628658ddaafSShawn Lu unsigned char newhash[16]; 629658ddaafSShawn Lu int genhash; 630658ddaafSShawn Lu struct sock *sk1 = NULL; 631cfb6eeb4SYOSHIFUJI Hideaki #endif 632a86b1e30SPavel Emelyanov struct net *net; 6331da177e4SLinus Torvalds 6341da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 6351da177e4SLinus Torvalds if (th->rst) 6361da177e4SLinus Torvalds return; 6371da177e4SLinus Torvalds 638511c3f92SEric Dumazet if (skb_rtable(skb)->rt_type != RTN_LOCAL) 6391da177e4SLinus Torvalds return; 6401da177e4SLinus Torvalds 6411da177e4SLinus Torvalds /* Swap the send and the receive. */ 642cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 643cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 644cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 645cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 646cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6471da177e4SLinus Torvalds 6481da177e4SLinus Torvalds if (th->ack) { 649cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6501da177e4SLinus Torvalds } else { 651cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 652cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6531da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6541da177e4SLinus Torvalds } 6551da177e4SLinus Torvalds 6567174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 657cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 658cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 659cfb6eeb4SYOSHIFUJI Hideaki 660cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 661658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 662658ddaafSShawn Lu if (!sk && hash_location) { 663658ddaafSShawn Lu /* 664658ddaafSShawn Lu * active side is lost. Try to find listening socket through 665658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 666658ddaafSShawn Lu * we are not loose security here: 667658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 668658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 669658ddaafSShawn Lu */ 670658ddaafSShawn Lu sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), 671658ddaafSShawn Lu &tcp_hashinfo, ip_hdr(skb)->daddr, 672658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 673658ddaafSShawn Lu /* don't send rst if it can't find key */ 674658ddaafSShawn Lu if (!sk1) 675658ddaafSShawn Lu return; 676658ddaafSShawn Lu rcu_read_lock(); 677658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 678658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 679658ddaafSShawn Lu if (!key) 680658ddaafSShawn Lu goto release_sk1; 681658ddaafSShawn Lu 682658ddaafSShawn Lu genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); 683658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 684658ddaafSShawn Lu goto release_sk1; 685658ddaafSShawn Lu } else { 686658ddaafSShawn Lu key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 687658ddaafSShawn Lu &ip_hdr(skb)->saddr, 688a915da9bSEric Dumazet AF_INET) : NULL; 689658ddaafSShawn Lu } 690658ddaafSShawn Lu 691cfb6eeb4SYOSHIFUJI Hideaki if (key) { 692cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 693cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 694cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 695cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 696cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 697cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 698cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 699cfb6eeb4SYOSHIFUJI Hideaki 70049a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 70178e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 70278e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 703cfb6eeb4SYOSHIFUJI Hideaki } 704cfb6eeb4SYOSHIFUJI Hideaki #endif 705eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 706eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 70752cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 7081da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 70988ef4a5aSKOVACS Krisztian arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 710e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 7114c675258SAlexey Kuznetsov * routing might fail in this case. No choice here, if we choose to force 7124c675258SAlexey Kuznetsov * input interface, we will misroute in case of asymmetric route. 713e2446eaaSShawn Lu */ 7144c675258SAlexey Kuznetsov if (sk) 7154c675258SAlexey Kuznetsov arg.bound_dev_if = sk->sk_bound_dev_if; 7161da177e4SLinus Torvalds 717adf30907SEric Dumazet net = dev_net(skb_dst(skb)->dev); 71866b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 719be9f4a44SEric Dumazet ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, 72070e73416SDavid S. Miller ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 7211da177e4SLinus Torvalds 72263231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 72363231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 724658ddaafSShawn Lu 725658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 726658ddaafSShawn Lu release_sk1: 727658ddaafSShawn Lu if (sk1) { 728658ddaafSShawn Lu rcu_read_unlock(); 729658ddaafSShawn Lu sock_put(sk1); 730658ddaafSShawn Lu } 731658ddaafSShawn Lu #endif 7321da177e4SLinus Torvalds } 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7351da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7361da177e4SLinus Torvalds */ 7371da177e4SLinus Torvalds 7389501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 7399501f972SYOSHIFUJI Hideaki u32 win, u32 ts, int oif, 74088ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 74166b13d99SEric Dumazet int reply_flags, u8 tos) 7421da177e4SLinus Torvalds { 743cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7441da177e4SLinus Torvalds struct { 7451da177e4SLinus Torvalds struct tcphdr th; 746714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 747cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 748cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 749cfb6eeb4SYOSHIFUJI Hideaki #endif 750cfb6eeb4SYOSHIFUJI Hideaki ]; 7511da177e4SLinus Torvalds } rep; 7521da177e4SLinus Torvalds struct ip_reply_arg arg; 753adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 7541da177e4SLinus Torvalds 7551da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7567174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7571da177e4SLinus Torvalds 7581da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7591da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 7601da177e4SLinus Torvalds if (ts) { 761cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7621da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7631da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 764cfb6eeb4SYOSHIFUJI Hideaki rep.opt[1] = htonl(tcp_time_stamp); 765cfb6eeb4SYOSHIFUJI Hideaki rep.opt[2] = htonl(ts); 766cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7671da177e4SLinus Torvalds } 7681da177e4SLinus Torvalds 7691da177e4SLinus Torvalds /* Swap the send and the receive. */ 7701da177e4SLinus Torvalds rep.th.dest = th->source; 7711da177e4SLinus Torvalds rep.th.source = th->dest; 7721da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7731da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7741da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7751da177e4SLinus Torvalds rep.th.ack = 1; 7761da177e4SLinus Torvalds rep.th.window = htons(win); 7771da177e4SLinus Torvalds 778cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 779cfb6eeb4SYOSHIFUJI Hideaki if (key) { 780cfb6eeb4SYOSHIFUJI Hideaki int offset = (ts) ? 3 : 0; 781cfb6eeb4SYOSHIFUJI Hideaki 782cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 783cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 784cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 785cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 786cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 787cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 788cfb6eeb4SYOSHIFUJI Hideaki 78949a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 79090b7e112SAdam Langley key, ip_hdr(skb)->saddr, 79190b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 792cfb6eeb4SYOSHIFUJI Hideaki } 793cfb6eeb4SYOSHIFUJI Hideaki #endif 79488ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 795eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 796eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7971da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7981da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7999501f972SYOSHIFUJI Hideaki if (oif) 8009501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 80166b13d99SEric Dumazet arg.tos = tos; 802be9f4a44SEric Dumazet ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, 80370e73416SDavid S. Miller ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 8041da177e4SLinus Torvalds 80563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 8091da177e4SLinus Torvalds { 8108feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 811cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 8121da177e4SLinus Torvalds 8139501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 8147174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 8159501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 8169501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 81788ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 81866b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 81966b13d99SEric Dumazet tw->tw_tos 8209501f972SYOSHIFUJI Hideaki ); 8211da177e4SLinus Torvalds 8228feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 8231da177e4SLinus Torvalds } 8241da177e4SLinus Torvalds 8256edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 8267174259eSArnaldo Carvalho de Melo struct request_sock *req) 8271da177e4SLinus Torvalds { 828168a8f58SJerry Chu /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 829168a8f58SJerry Chu * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 830168a8f58SJerry Chu */ 831168a8f58SJerry Chu tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 832168a8f58SJerry Chu tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 833168a8f58SJerry Chu tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 8349501f972SYOSHIFUJI Hideaki req->ts_recent, 8359501f972SYOSHIFUJI Hideaki 0, 836a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 837a915da9bSEric Dumazet AF_INET), 83866b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 83966b13d99SEric Dumazet ip_hdr(skb)->tos); 8401da177e4SLinus Torvalds } 8411da177e4SLinus Torvalds 8421da177e4SLinus Torvalds /* 8439bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 84460236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8451da177e4SLinus Torvalds * socket. 8461da177e4SLinus Torvalds */ 84772659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 848e6b4d113SWilliam Allen Simpson struct request_sock *req, 849fff32699SEric Dumazet struct request_values *rvp, 8507586ecebSEric Dumazet u16 queue_mapping, 8517586ecebSEric Dumazet bool nocache) 8521da177e4SLinus Torvalds { 8532e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8546bd023f3SDavid S. Miller struct flowi4 fl4; 8551da177e4SLinus Torvalds int err = -1; 8561da177e4SLinus Torvalds struct sk_buff * skb; 8571da177e4SLinus Torvalds 8581da177e4SLinus Torvalds /* First, grab a route. */ 859ba3f7f04SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 860fd80eb94SDenis V. Lunev return -1; 8611da177e4SLinus Torvalds 8628336886fSJerry Chu skb = tcp_make_synack(sk, dst, req, rvp, NULL); 8631da177e4SLinus Torvalds 8641da177e4SLinus Torvalds if (skb) { 865419f9f89SHerbert Xu __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 8661da177e4SLinus Torvalds 867fff32699SEric Dumazet skb_set_queue_mapping(skb, queue_mapping); 8682e6599cbSArnaldo Carvalho de Melo err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 8692e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 8702e6599cbSArnaldo Carvalho de Melo ireq->opt); 871b9df3cb8SGerrit Renker err = net_xmit_eval(err); 872016818d0SNeal Cardwell if (!tcp_rsk(req)->snt_synack && !err) 873016818d0SNeal Cardwell tcp_rsk(req)->snt_synack = tcp_time_stamp; 8741da177e4SLinus Torvalds } 8751da177e4SLinus Torvalds 8761da177e4SLinus Torvalds return err; 8771da177e4SLinus Torvalds } 8781da177e4SLinus Torvalds 87972659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 880e6b4d113SWilliam Allen Simpson struct request_values *rvp) 881fd80eb94SDenis V. Lunev { 88272659eccSOctavian Purdila TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 8837586ecebSEric Dumazet return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 884fd80eb94SDenis V. Lunev } 885fd80eb94SDenis V. Lunev 8861da177e4SLinus Torvalds /* 88760236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8881da177e4SLinus Torvalds */ 88960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8901da177e4SLinus Torvalds { 8912e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8921da177e4SLinus Torvalds } 8931da177e4SLinus Torvalds 894946cedccSEric Dumazet /* 895a2a385d6SEric Dumazet * Return true if a syncookie should be sent 896946cedccSEric Dumazet */ 897a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk, 898946cedccSEric Dumazet const struct sk_buff *skb, 899946cedccSEric Dumazet const char *proto) 9001da177e4SLinus Torvalds { 901946cedccSEric Dumazet const char *msg = "Dropping request"; 902a2a385d6SEric Dumazet bool want_cookie = false; 903946cedccSEric Dumazet struct listen_sock *lopt; 904946cedccSEric Dumazet 905946cedccSEric Dumazet 9061da177e4SLinus Torvalds 9072a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES 908946cedccSEric Dumazet if (sysctl_tcp_syncookies) { 9092a1d4bd4SFlorian Westphal msg = "Sending cookies"; 910a2a385d6SEric Dumazet want_cookie = true; 911946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 912946cedccSEric Dumazet } else 91380e40daaSArnaldo Carvalho de Melo #endif 914946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 9152a1d4bd4SFlorian Westphal 916946cedccSEric Dumazet lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 917946cedccSEric Dumazet if (!lopt->synflood_warned) { 918946cedccSEric Dumazet lopt->synflood_warned = 1; 919afd46503SJoe Perches pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 920946cedccSEric Dumazet proto, ntohs(tcp_hdr(skb)->dest), msg); 9212a1d4bd4SFlorian Westphal } 922946cedccSEric Dumazet return want_cookie; 923946cedccSEric Dumazet } 924946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action); 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds /* 92760236fddSArnaldo Carvalho de Melo * Save and compile IPv4 options into the request_sock if needed. 9281da177e4SLinus Torvalds */ 9295dff747bSChristoph Paasch static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) 9301da177e4SLinus Torvalds { 931f6d8bd05SEric Dumazet const struct ip_options *opt = &(IPCB(skb)->opt); 932f6d8bd05SEric Dumazet struct ip_options_rcu *dopt = NULL; 9331da177e4SLinus Torvalds 9341da177e4SLinus Torvalds if (opt && opt->optlen) { 935f6d8bd05SEric Dumazet int opt_size = sizeof(*dopt) + opt->optlen; 936f6d8bd05SEric Dumazet 9371da177e4SLinus Torvalds dopt = kmalloc(opt_size, GFP_ATOMIC); 9381da177e4SLinus Torvalds if (dopt) { 939f6d8bd05SEric Dumazet if (ip_options_echo(&dopt->opt, skb)) { 9401da177e4SLinus Torvalds kfree(dopt); 9411da177e4SLinus Torvalds dopt = NULL; 9421da177e4SLinus Torvalds } 9431da177e4SLinus Torvalds } 9441da177e4SLinus Torvalds } 9451da177e4SLinus Torvalds return dopt; 9461da177e4SLinus Torvalds } 9471da177e4SLinus Torvalds 948cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 949cfb6eeb4SYOSHIFUJI Hideaki /* 950cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 951cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 952cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 953cfb6eeb4SYOSHIFUJI Hideaki */ 954cfb6eeb4SYOSHIFUJI Hideaki 955cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 956a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 957a915da9bSEric Dumazet const union tcp_md5_addr *addr, 958a915da9bSEric Dumazet int family) 959cfb6eeb4SYOSHIFUJI Hideaki { 960cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 961a915da9bSEric Dumazet struct tcp_md5sig_key *key; 962a915da9bSEric Dumazet struct hlist_node *pos; 963a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 964a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 965cfb6eeb4SYOSHIFUJI Hideaki 966a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 967a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 968b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 969b4fb05eaSEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 970a8afca03SEric Dumazet if (!md5sig) 971cfb6eeb4SYOSHIFUJI Hideaki return NULL; 972a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 973a915da9bSEric Dumazet if (family == AF_INET6) 974a915da9bSEric Dumazet size = sizeof(struct in6_addr); 975a915da9bSEric Dumazet #endif 976a8afca03SEric Dumazet hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { 977a915da9bSEric Dumazet if (key->family != family) 978a915da9bSEric Dumazet continue; 979a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 980a915da9bSEric Dumazet return key; 981cfb6eeb4SYOSHIFUJI Hideaki } 982cfb6eeb4SYOSHIFUJI Hideaki return NULL; 983cfb6eeb4SYOSHIFUJI Hideaki } 984a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 985cfb6eeb4SYOSHIFUJI Hideaki 986cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 987cfb6eeb4SYOSHIFUJI Hideaki struct sock *addr_sk) 988cfb6eeb4SYOSHIFUJI Hideaki { 989a915da9bSEric Dumazet union tcp_md5_addr *addr; 990a915da9bSEric Dumazet 991a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; 992a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 993cfb6eeb4SYOSHIFUJI Hideaki } 994cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 995cfb6eeb4SYOSHIFUJI Hideaki 996f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 997cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req) 998cfb6eeb4SYOSHIFUJI Hideaki { 999a915da9bSEric Dumazet union tcp_md5_addr *addr; 1000a915da9bSEric Dumazet 1001a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; 1002a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 1003cfb6eeb4SYOSHIFUJI Hideaki } 1004cfb6eeb4SYOSHIFUJI Hideaki 1005cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 1006a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 1007a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 1008cfb6eeb4SYOSHIFUJI Hideaki { 1009cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 1010b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 1011cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1012f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 1013f6685938SArnaldo Carvalho de Melo 1014a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 1015a915da9bSEric Dumazet if (key) { 1016a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 1017a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 1018a915da9bSEric Dumazet key->keylen = newkeylen; 1019a915da9bSEric Dumazet return 0; 1020cfb6eeb4SYOSHIFUJI Hideaki } 1021260fcbebSYan, Zheng 1022a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1023a8afca03SEric Dumazet sock_owned_by_user(sk)); 1024a915da9bSEric Dumazet if (!md5sig) { 1025a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 1026a915da9bSEric Dumazet if (!md5sig) 1027a915da9bSEric Dumazet return -ENOMEM; 1028a915da9bSEric Dumazet 1029a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1030a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 1031a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 1032a915da9bSEric Dumazet } 1033a915da9bSEric Dumazet 10345f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 1035a915da9bSEric Dumazet if (!key) 1036a915da9bSEric Dumazet return -ENOMEM; 1037a915da9bSEric Dumazet if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { 10385f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 1039cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 1040cfb6eeb4SYOSHIFUJI Hideaki } 1041f6685938SArnaldo Carvalho de Melo 1042a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 1043a915da9bSEric Dumazet key->keylen = newkeylen; 1044a915da9bSEric Dumazet key->family = family; 1045a915da9bSEric Dumazet memcpy(&key->addr, addr, 1046a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 1047a915da9bSEric Dumazet sizeof(struct in_addr)); 1048a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 1049cfb6eeb4SYOSHIFUJI Hideaki return 0; 1050cfb6eeb4SYOSHIFUJI Hideaki } 1051a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 1052cfb6eeb4SYOSHIFUJI Hideaki 1053a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 1054cfb6eeb4SYOSHIFUJI Hideaki { 1055cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1056a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1057a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1058cfb6eeb4SYOSHIFUJI Hideaki 1059a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 1060a915da9bSEric Dumazet if (!key) 1061cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 1062a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10635f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1064a915da9bSEric Dumazet kfree_rcu(key, rcu); 1065a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1066a8afca03SEric Dumazet sock_owned_by_user(sk)); 1067a8afca03SEric Dumazet if (hlist_empty(&md5sig->head)) 1068a915da9bSEric Dumazet tcp_free_md5sig_pool(); 1069a915da9bSEric Dumazet return 0; 1070cfb6eeb4SYOSHIFUJI Hideaki } 1071a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 1072cfb6eeb4SYOSHIFUJI Hideaki 1073*e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk) 1074cfb6eeb4SYOSHIFUJI Hideaki { 1075cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1076a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1077a915da9bSEric Dumazet struct hlist_node *pos, *n; 1078a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1079cfb6eeb4SYOSHIFUJI Hideaki 1080a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1081a8afca03SEric Dumazet 1082a8afca03SEric Dumazet if (!hlist_empty(&md5sig->head)) 1083cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 1084a8afca03SEric Dumazet hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { 1085a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10865f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1087a915da9bSEric Dumazet kfree_rcu(key, rcu); 1088cfb6eeb4SYOSHIFUJI Hideaki } 1089cfb6eeb4SYOSHIFUJI Hideaki } 1090cfb6eeb4SYOSHIFUJI Hideaki 1091cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 1092cfb6eeb4SYOSHIFUJI Hideaki int optlen) 1093cfb6eeb4SYOSHIFUJI Hideaki { 1094cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 1095cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1096cfb6eeb4SYOSHIFUJI Hideaki 1097cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 1098cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1099cfb6eeb4SYOSHIFUJI Hideaki 1100cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1101cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1102cfb6eeb4SYOSHIFUJI Hideaki 1103cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1104cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1105cfb6eeb4SYOSHIFUJI Hideaki 1106a8afca03SEric Dumazet if (!cmd.tcpm_key || !cmd.tcpm_keylen) 1107a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1108a915da9bSEric Dumazet AF_INET); 1109cfb6eeb4SYOSHIFUJI Hideaki 1110cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1111cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1112cfb6eeb4SYOSHIFUJI Hideaki 1113a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1114a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1115a915da9bSEric Dumazet GFP_KERNEL); 1116cfb6eeb4SYOSHIFUJI Hideaki } 1117cfb6eeb4SYOSHIFUJI Hideaki 111849a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 111949a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1120cfb6eeb4SYOSHIFUJI Hideaki { 1121cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 112249a72dfbSAdam Langley struct scatterlist sg; 1123cfb6eeb4SYOSHIFUJI Hideaki 1124cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1125cfb6eeb4SYOSHIFUJI Hideaki 1126cfb6eeb4SYOSHIFUJI Hideaki /* 112749a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1128cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1129cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1130cfb6eeb4SYOSHIFUJI Hideaki */ 1131cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1132cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1133cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1134076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 113549a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1136c7da57a1SDavid S. Miller 113749a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 113849a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 113949a72dfbSAdam Langley } 114049a72dfbSAdam Langley 1141a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1142318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 114349a72dfbSAdam Langley { 114449a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 114549a72dfbSAdam Langley struct hash_desc *desc; 114649a72dfbSAdam Langley 114749a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 114849a72dfbSAdam Langley if (!hp) 114949a72dfbSAdam Langley goto clear_hash_noput; 115049a72dfbSAdam Langley desc = &hp->md5_desc; 115149a72dfbSAdam Langley 115249a72dfbSAdam Langley if (crypto_hash_init(desc)) 115349a72dfbSAdam Langley goto clear_hash; 115449a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 115549a72dfbSAdam Langley goto clear_hash; 115649a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 115749a72dfbSAdam Langley goto clear_hash; 115849a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 115949a72dfbSAdam Langley goto clear_hash; 116049a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1161cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1162cfb6eeb4SYOSHIFUJI Hideaki 1163cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1164cfb6eeb4SYOSHIFUJI Hideaki return 0; 116549a72dfbSAdam Langley 1166cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1167cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1168cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1169cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 117049a72dfbSAdam Langley return 1; 1171cfb6eeb4SYOSHIFUJI Hideaki } 1172cfb6eeb4SYOSHIFUJI Hideaki 117349a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1174318cf7aaSEric Dumazet const struct sock *sk, const struct request_sock *req, 1175318cf7aaSEric Dumazet const struct sk_buff *skb) 1176cfb6eeb4SYOSHIFUJI Hideaki { 117749a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 117849a72dfbSAdam Langley struct hash_desc *desc; 1179318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1180cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1181cfb6eeb4SYOSHIFUJI Hideaki 1182cfb6eeb4SYOSHIFUJI Hideaki if (sk) { 1183c720c7e8SEric Dumazet saddr = inet_sk(sk)->inet_saddr; 1184c720c7e8SEric Dumazet daddr = inet_sk(sk)->inet_daddr; 118549a72dfbSAdam Langley } else if (req) { 118649a72dfbSAdam Langley saddr = inet_rsk(req)->loc_addr; 118749a72dfbSAdam Langley daddr = inet_rsk(req)->rmt_addr; 1188cfb6eeb4SYOSHIFUJI Hideaki } else { 118949a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 119049a72dfbSAdam Langley saddr = iph->saddr; 119149a72dfbSAdam Langley daddr = iph->daddr; 1192cfb6eeb4SYOSHIFUJI Hideaki } 1193cfb6eeb4SYOSHIFUJI Hideaki 119449a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 119549a72dfbSAdam Langley if (!hp) 119649a72dfbSAdam Langley goto clear_hash_noput; 119749a72dfbSAdam Langley desc = &hp->md5_desc; 119849a72dfbSAdam Langley 119949a72dfbSAdam Langley if (crypto_hash_init(desc)) 120049a72dfbSAdam Langley goto clear_hash; 120149a72dfbSAdam Langley 120249a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 120349a72dfbSAdam Langley goto clear_hash; 120449a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 120549a72dfbSAdam Langley goto clear_hash; 120649a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 120749a72dfbSAdam Langley goto clear_hash; 120849a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 120949a72dfbSAdam Langley goto clear_hash; 121049a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 121149a72dfbSAdam Langley goto clear_hash; 121249a72dfbSAdam Langley 121349a72dfbSAdam Langley tcp_put_md5sig_pool(); 121449a72dfbSAdam Langley return 0; 121549a72dfbSAdam Langley 121649a72dfbSAdam Langley clear_hash: 121749a72dfbSAdam Langley tcp_put_md5sig_pool(); 121849a72dfbSAdam Langley clear_hash_noput: 121949a72dfbSAdam Langley memset(md5_hash, 0, 16); 122049a72dfbSAdam Langley return 1; 122149a72dfbSAdam Langley } 122249a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1223cfb6eeb4SYOSHIFUJI Hideaki 1224a2a385d6SEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 1225cfb6eeb4SYOSHIFUJI Hideaki { 1226cfb6eeb4SYOSHIFUJI Hideaki /* 1227cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1228cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1229cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1230cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1231cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1232cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1233cfb6eeb4SYOSHIFUJI Hideaki */ 1234cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1235cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1236eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1237cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1238cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1239cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1240cfb6eeb4SYOSHIFUJI Hideaki 1241a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1242a915da9bSEric Dumazet AF_INET); 12437d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1244cfb6eeb4SYOSHIFUJI Hideaki 1245cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1246cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1247a2a385d6SEric Dumazet return false; 1248cfb6eeb4SYOSHIFUJI Hideaki 1249cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1250785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1251a2a385d6SEric Dumazet return true; 1252cfb6eeb4SYOSHIFUJI Hideaki } 1253cfb6eeb4SYOSHIFUJI Hideaki 1254cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1255785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1256a2a385d6SEric Dumazet return true; 1257cfb6eeb4SYOSHIFUJI Hideaki } 1258cfb6eeb4SYOSHIFUJI Hideaki 1259cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1260cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1261cfb6eeb4SYOSHIFUJI Hideaki */ 126249a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1263cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 126449a72dfbSAdam Langley NULL, NULL, skb); 1265cfb6eeb4SYOSHIFUJI Hideaki 1266cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1267e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1268673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1269673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1270e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1271e87cc472SJoe Perches : ""); 1272a2a385d6SEric Dumazet return true; 1273cfb6eeb4SYOSHIFUJI Hideaki } 1274a2a385d6SEric Dumazet return false; 1275cfb6eeb4SYOSHIFUJI Hideaki } 1276cfb6eeb4SYOSHIFUJI Hideaki 1277cfb6eeb4SYOSHIFUJI Hideaki #endif 1278cfb6eeb4SYOSHIFUJI Hideaki 127972a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12801da177e4SLinus Torvalds .family = PF_INET, 12812e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 128272659eccSOctavian Purdila .rtx_syn_ack = tcp_v4_rtx_synack, 128360236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 128460236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12851da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 128672659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12871da177e4SLinus Torvalds }; 12881da177e4SLinus Torvalds 1289cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1290b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1291cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_reqsk_md5_lookup, 1292e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1293cfb6eeb4SYOSHIFUJI Hideaki }; 1294b6332e6cSAndrew Morton #endif 1295cfb6eeb4SYOSHIFUJI Hideaki 1296168a8f58SJerry Chu static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, 1297168a8f58SJerry Chu struct request_sock *req, 1298168a8f58SJerry Chu struct tcp_fastopen_cookie *foc, 1299168a8f58SJerry Chu struct tcp_fastopen_cookie *valid_foc) 1300168a8f58SJerry Chu { 1301168a8f58SJerry Chu bool skip_cookie = false; 1302168a8f58SJerry Chu struct fastopen_queue *fastopenq; 1303168a8f58SJerry Chu 1304168a8f58SJerry Chu if (likely(!fastopen_cookie_present(foc))) { 1305168a8f58SJerry Chu /* See include/net/tcp.h for the meaning of these knobs */ 1306168a8f58SJerry Chu if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || 1307168a8f58SJerry Chu ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && 1308168a8f58SJerry Chu (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) 1309168a8f58SJerry Chu skip_cookie = true; /* no cookie to validate */ 1310168a8f58SJerry Chu else 1311168a8f58SJerry Chu return false; 1312168a8f58SJerry Chu } 1313168a8f58SJerry Chu fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; 1314168a8f58SJerry Chu /* A FO option is present; bump the counter. */ 1315168a8f58SJerry Chu NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); 1316168a8f58SJerry Chu 1317168a8f58SJerry Chu /* Make sure the listener has enabled fastopen, and we don't 1318168a8f58SJerry Chu * exceed the max # of pending TFO requests allowed before trying 1319168a8f58SJerry Chu * to validating the cookie in order to avoid burning CPU cycles 1320168a8f58SJerry Chu * unnecessarily. 1321168a8f58SJerry Chu * 1322168a8f58SJerry Chu * XXX (TFO) - The implication of checking the max_qlen before 1323168a8f58SJerry Chu * processing a cookie request is that clients can't differentiate 1324168a8f58SJerry Chu * between qlen overflow causing Fast Open to be disabled 1325168a8f58SJerry Chu * temporarily vs a server not supporting Fast Open at all. 1326168a8f58SJerry Chu */ 1327168a8f58SJerry Chu if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || 1328168a8f58SJerry Chu fastopenq == NULL || fastopenq->max_qlen == 0) 1329168a8f58SJerry Chu return false; 1330168a8f58SJerry Chu 1331168a8f58SJerry Chu if (fastopenq->qlen >= fastopenq->max_qlen) { 1332168a8f58SJerry Chu struct request_sock *req1; 1333168a8f58SJerry Chu spin_lock(&fastopenq->lock); 1334168a8f58SJerry Chu req1 = fastopenq->rskq_rst_head; 1335168a8f58SJerry Chu if ((req1 == NULL) || time_after(req1->expires, jiffies)) { 1336168a8f58SJerry Chu spin_unlock(&fastopenq->lock); 1337168a8f58SJerry Chu NET_INC_STATS_BH(sock_net(sk), 1338168a8f58SJerry Chu LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); 1339168a8f58SJerry Chu /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ 1340168a8f58SJerry Chu foc->len = -1; 1341168a8f58SJerry Chu return false; 1342168a8f58SJerry Chu } 1343168a8f58SJerry Chu fastopenq->rskq_rst_head = req1->dl_next; 1344168a8f58SJerry Chu fastopenq->qlen--; 1345168a8f58SJerry Chu spin_unlock(&fastopenq->lock); 1346168a8f58SJerry Chu reqsk_free(req1); 1347168a8f58SJerry Chu } 1348168a8f58SJerry Chu if (skip_cookie) { 1349168a8f58SJerry Chu tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1350168a8f58SJerry Chu return true; 1351168a8f58SJerry Chu } 1352168a8f58SJerry Chu if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { 1353168a8f58SJerry Chu if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { 1354168a8f58SJerry Chu tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1355168a8f58SJerry Chu if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || 1356168a8f58SJerry Chu memcmp(&foc->val[0], &valid_foc->val[0], 1357168a8f58SJerry Chu TCP_FASTOPEN_COOKIE_SIZE) != 0) 1358168a8f58SJerry Chu return false; 1359168a8f58SJerry Chu valid_foc->len = -1; 1360168a8f58SJerry Chu } 1361168a8f58SJerry Chu /* Acknowledge the data received from the peer. */ 1362168a8f58SJerry Chu tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1363168a8f58SJerry Chu return true; 1364168a8f58SJerry Chu } else if (foc->len == 0) { /* Client requesting a cookie */ 1365168a8f58SJerry Chu tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1366168a8f58SJerry Chu NET_INC_STATS_BH(sock_net(sk), 1367168a8f58SJerry Chu LINUX_MIB_TCPFASTOPENCOOKIEREQD); 1368168a8f58SJerry Chu } else { 1369168a8f58SJerry Chu /* Client sent a cookie with wrong size. Treat it 1370168a8f58SJerry Chu * the same as invalid and return a valid one. 1371168a8f58SJerry Chu */ 1372168a8f58SJerry Chu tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1373168a8f58SJerry Chu } 1374168a8f58SJerry Chu return false; 1375168a8f58SJerry Chu } 1376168a8f58SJerry Chu 1377168a8f58SJerry Chu static int tcp_v4_conn_req_fastopen(struct sock *sk, 1378168a8f58SJerry Chu struct sk_buff *skb, 1379168a8f58SJerry Chu struct sk_buff *skb_synack, 1380168a8f58SJerry Chu struct request_sock *req, 1381168a8f58SJerry Chu struct request_values *rvp) 1382168a8f58SJerry Chu { 1383168a8f58SJerry Chu struct tcp_sock *tp = tcp_sk(sk); 1384168a8f58SJerry Chu struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 1385168a8f58SJerry Chu const struct inet_request_sock *ireq = inet_rsk(req); 1386168a8f58SJerry Chu struct sock *child; 1387016818d0SNeal Cardwell int err; 1388168a8f58SJerry Chu 1389168a8f58SJerry Chu req->retrans = 0; 1390168a8f58SJerry Chu req->sk = NULL; 1391168a8f58SJerry Chu 1392168a8f58SJerry Chu child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 1393168a8f58SJerry Chu if (child == NULL) { 1394168a8f58SJerry Chu NET_INC_STATS_BH(sock_net(sk), 1395168a8f58SJerry Chu LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 1396168a8f58SJerry Chu kfree_skb(skb_synack); 1397168a8f58SJerry Chu return -1; 1398168a8f58SJerry Chu } 1399016818d0SNeal Cardwell err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, 1400168a8f58SJerry Chu ireq->rmt_addr, ireq->opt); 1401016818d0SNeal Cardwell err = net_xmit_eval(err); 1402016818d0SNeal Cardwell if (!err) 1403016818d0SNeal Cardwell tcp_rsk(req)->snt_synack = tcp_time_stamp; 1404168a8f58SJerry Chu /* XXX (TFO) - is it ok to ignore error and continue? */ 1405168a8f58SJerry Chu 1406168a8f58SJerry Chu spin_lock(&queue->fastopenq->lock); 1407168a8f58SJerry Chu queue->fastopenq->qlen++; 1408168a8f58SJerry Chu spin_unlock(&queue->fastopenq->lock); 1409168a8f58SJerry Chu 1410168a8f58SJerry Chu /* Initialize the child socket. Have to fix some values to take 1411168a8f58SJerry Chu * into account the child is a Fast Open socket and is created 1412168a8f58SJerry Chu * only out of the bits carried in the SYN packet. 1413168a8f58SJerry Chu */ 1414168a8f58SJerry Chu tp = tcp_sk(child); 1415168a8f58SJerry Chu 1416168a8f58SJerry Chu tp->fastopen_rsk = req; 1417168a8f58SJerry Chu /* Do a hold on the listner sk so that if the listener is being 1418168a8f58SJerry Chu * closed, the child that has been accepted can live on and still 1419168a8f58SJerry Chu * access listen_lock. 1420168a8f58SJerry Chu */ 1421168a8f58SJerry Chu sock_hold(sk); 1422168a8f58SJerry Chu tcp_rsk(req)->listener = sk; 1423168a8f58SJerry Chu 1424168a8f58SJerry Chu /* RFC1323: The window in SYN & SYN/ACK segments is never 1425168a8f58SJerry Chu * scaled. So correct it appropriately. 1426168a8f58SJerry Chu */ 1427168a8f58SJerry Chu tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 1428168a8f58SJerry Chu 1429168a8f58SJerry Chu /* Activate the retrans timer so that SYNACK can be retransmitted. 1430168a8f58SJerry Chu * The request socket is not added to the SYN table of the parent 1431168a8f58SJerry Chu * because it's been added to the accept queue directly. 1432168a8f58SJerry Chu */ 1433168a8f58SJerry Chu inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 1434168a8f58SJerry Chu TCP_TIMEOUT_INIT, TCP_RTO_MAX); 1435168a8f58SJerry Chu 1436168a8f58SJerry Chu /* Add the child socket directly into the accept queue */ 1437168a8f58SJerry Chu inet_csk_reqsk_queue_add(sk, req, child); 1438168a8f58SJerry Chu 1439168a8f58SJerry Chu /* Now finish processing the fastopen child socket. */ 1440168a8f58SJerry Chu inet_csk(child)->icsk_af_ops->rebuild_header(child); 1441168a8f58SJerry Chu tcp_init_congestion_control(child); 1442168a8f58SJerry Chu tcp_mtup_init(child); 1443168a8f58SJerry Chu tcp_init_buffer_space(child); 1444168a8f58SJerry Chu tcp_init_metrics(child); 1445168a8f58SJerry Chu 1446168a8f58SJerry Chu /* Queue the data carried in the SYN packet. We need to first 1447168a8f58SJerry Chu * bump skb's refcnt because the caller will attempt to free it. 1448168a8f58SJerry Chu * 1449168a8f58SJerry Chu * XXX (TFO) - we honor a zero-payload TFO request for now. 1450168a8f58SJerry Chu * (Any reason not to?) 1451168a8f58SJerry Chu */ 1452168a8f58SJerry Chu if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { 1453168a8f58SJerry Chu /* Don't queue the skb if there is no payload in SYN. 1454168a8f58SJerry Chu * XXX (TFO) - How about SYN+FIN? 1455168a8f58SJerry Chu */ 1456168a8f58SJerry Chu tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1457168a8f58SJerry Chu } else { 1458168a8f58SJerry Chu skb = skb_get(skb); 1459168a8f58SJerry Chu skb_dst_drop(skb); 1460168a8f58SJerry Chu __skb_pull(skb, tcp_hdr(skb)->doff * 4); 1461168a8f58SJerry Chu skb_set_owner_r(skb, child); 1462168a8f58SJerry Chu __skb_queue_tail(&child->sk_receive_queue, skb); 1463168a8f58SJerry Chu tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1464168a8f58SJerry Chu } 1465168a8f58SJerry Chu sk->sk_data_ready(sk, 0); 1466168a8f58SJerry Chu bh_unlock_sock(child); 1467168a8f58SJerry Chu sock_put(child); 1468168a8f58SJerry Chu WARN_ON(req->sk == NULL); 1469168a8f58SJerry Chu return 0; 1470168a8f58SJerry Chu } 1471168a8f58SJerry Chu 14721da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 14731da177e4SLinus Torvalds { 14744957faadSWilliam Allen Simpson struct tcp_extend_values tmp_ext; 14751da177e4SLinus Torvalds struct tcp_options_received tmp_opt; 1476cf533ea5SEric Dumazet const u8 *hash_location; 147760236fddSArnaldo Carvalho de Melo struct request_sock *req; 1478e6b4d113SWilliam Allen Simpson struct inet_request_sock *ireq; 14794957faadSWilliam Allen Simpson struct tcp_sock *tp = tcp_sk(sk); 1480e6b4d113SWilliam Allen Simpson struct dst_entry *dst = NULL; 1481eddc9ec5SArnaldo Carvalho de Melo __be32 saddr = ip_hdr(skb)->saddr; 1482eddc9ec5SArnaldo Carvalho de Melo __be32 daddr = ip_hdr(skb)->daddr; 14831da177e4SLinus Torvalds __u32 isn = TCP_SKB_CB(skb)->when; 1484a2a385d6SEric Dumazet bool want_cookie = false; 1485168a8f58SJerry Chu struct flowi4 fl4; 1486168a8f58SJerry Chu struct tcp_fastopen_cookie foc = { .len = -1 }; 1487168a8f58SJerry Chu struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 1488168a8f58SJerry Chu struct sk_buff *skb_synack; 1489168a8f58SJerry Chu int do_fastopen; 14901da177e4SLinus Torvalds 14911da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1492511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 14931da177e4SLinus Torvalds goto drop; 14941da177e4SLinus Torvalds 14951da177e4SLinus Torvalds /* TW buckets are converted to open requests without 14961da177e4SLinus Torvalds * limitations, they conserve resources and peer is 14971da177e4SLinus Torvalds * evidently real one. 14981da177e4SLinus Torvalds */ 1499463c84b9SArnaldo Carvalho de Melo if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1500946cedccSEric Dumazet want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); 1501946cedccSEric Dumazet if (!want_cookie) 15021da177e4SLinus Torvalds goto drop; 15031da177e4SLinus Torvalds } 15041da177e4SLinus Torvalds 15051da177e4SLinus Torvalds /* Accept backlog is full. If we have already queued enough 15061da177e4SLinus Torvalds * of warm entries in syn queue, drop request. It is better than 15071da177e4SLinus Torvalds * clogging syn queue with openreqs with exponentially increasing 15081da177e4SLinus Torvalds * timeout. 15091da177e4SLinus Torvalds */ 1510463c84b9SArnaldo Carvalho de Melo if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 15111da177e4SLinus Torvalds goto drop; 15121da177e4SLinus Torvalds 1513ce4a7d0dSArnaldo Carvalho de Melo req = inet_reqsk_alloc(&tcp_request_sock_ops); 15141da177e4SLinus Torvalds if (!req) 15151da177e4SLinus Torvalds goto drop; 15161da177e4SLinus Torvalds 1517cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1518cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1519cfb6eeb4SYOSHIFUJI Hideaki #endif 1520cfb6eeb4SYOSHIFUJI Hideaki 15211da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 1522bee7ca9eSWilliam Allen Simpson tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 15234957faadSWilliam Allen Simpson tmp_opt.user_mss = tp->rx_opt.user_mss; 1524168a8f58SJerry Chu tcp_parse_options(skb, &tmp_opt, &hash_location, 0, 1525168a8f58SJerry Chu want_cookie ? NULL : &foc); 15261da177e4SLinus Torvalds 15274957faadSWilliam Allen Simpson if (tmp_opt.cookie_plus > 0 && 15284957faadSWilliam Allen Simpson tmp_opt.saw_tstamp && 15294957faadSWilliam Allen Simpson !tp->rx_opt.cookie_out_never && 15304957faadSWilliam Allen Simpson (sysctl_tcp_cookie_size > 0 || 15314957faadSWilliam Allen Simpson (tp->cookie_values != NULL && 15324957faadSWilliam Allen Simpson tp->cookie_values->cookie_desired > 0))) { 15334957faadSWilliam Allen Simpson u8 *c; 15344957faadSWilliam Allen Simpson u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; 15354957faadSWilliam Allen Simpson int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; 15364957faadSWilliam Allen Simpson 15374957faadSWilliam Allen Simpson if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) 15384957faadSWilliam Allen Simpson goto drop_and_release; 15394957faadSWilliam Allen Simpson 15404957faadSWilliam Allen Simpson /* Secret recipe starts with IP addresses */ 15410eae88f3SEric Dumazet *mess++ ^= (__force u32)daddr; 15420eae88f3SEric Dumazet *mess++ ^= (__force u32)saddr; 15434957faadSWilliam Allen Simpson 15444957faadSWilliam Allen Simpson /* plus variable length Initiator Cookie */ 15454957faadSWilliam Allen Simpson c = (u8 *)mess; 15464957faadSWilliam Allen Simpson while (l-- > 0) 15474957faadSWilliam Allen Simpson *c++ ^= *hash_location++; 15484957faadSWilliam Allen Simpson 1549a2a385d6SEric Dumazet want_cookie = false; /* not our kind of cookie */ 15504957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 0; /* false */ 15514957faadSWilliam Allen Simpson tmp_ext.cookie_plus = tmp_opt.cookie_plus; 15524957faadSWilliam Allen Simpson } else if (!tp->rx_opt.cookie_in_always) { 15534957faadSWilliam Allen Simpson /* redundant indications, but ensure initialization. */ 15544957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 1; /* true */ 15554957faadSWilliam Allen Simpson tmp_ext.cookie_plus = 0; 15564957faadSWilliam Allen Simpson } else { 15574957faadSWilliam Allen Simpson goto drop_and_release; 15584957faadSWilliam Allen Simpson } 15594957faadSWilliam Allen Simpson tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; 15601da177e4SLinus Torvalds 15614dfc2817SFlorian Westphal if (want_cookie && !tmp_opt.saw_tstamp) 15621da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 15631da177e4SLinus Torvalds 15641da177e4SLinus Torvalds tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 15651da177e4SLinus Torvalds tcp_openreq_init(req, &tmp_opt, skb); 15661da177e4SLinus Torvalds 1567bb5b7c11SDavid S. Miller ireq = inet_rsk(req); 1568bb5b7c11SDavid S. Miller ireq->loc_addr = daddr; 1569bb5b7c11SDavid S. Miller ireq->rmt_addr = saddr; 1570bb5b7c11SDavid S. Miller ireq->no_srccheck = inet_sk(sk)->transparent; 15715dff747bSChristoph Paasch ireq->opt = tcp_v4_save_options(skb); 1572bb5b7c11SDavid S. Miller 1573284904aaSPaul Moore if (security_inet_conn_request(sk, skb, req)) 1574bb5b7c11SDavid S. Miller goto drop_and_free; 1575284904aaSPaul Moore 1576172d69e6SFlorian Westphal if (!want_cookie || tmp_opt.tstamp_ok) 1577bd14b1b2SEric Dumazet TCP_ECN_create_request(req, skb); 15781da177e4SLinus Torvalds 15791da177e4SLinus Torvalds if (want_cookie) { 15801da177e4SLinus Torvalds isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1581172d69e6SFlorian Westphal req->cookie_ts = tmp_opt.tstamp_ok; 15821da177e4SLinus Torvalds } else if (!isn) { 15831da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen 15841da177e4SLinus Torvalds * from the destination in peer table, when entering 15851da177e4SLinus Torvalds * state TIME-WAIT, and check against it before 15861da177e4SLinus Torvalds * accepting new connection request. 15871da177e4SLinus Torvalds * 15881da177e4SLinus Torvalds * If "isn" is not zero, this request hit alive 15891da177e4SLinus Torvalds * timewait bucket, so that all the necessary checks 15901da177e4SLinus Torvalds * are made in the function processing timewait state. 15911da177e4SLinus Torvalds */ 15921da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && 1593295ff7edSArnaldo Carvalho de Melo tcp_death_row.sysctl_tw_recycle && 1594ba3f7f04SDavid S. Miller (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && 159581166dd6SDavid S. Miller fl4.daddr == saddr) { 159681166dd6SDavid S. Miller if (!tcp_peer_is_proven(req, dst, true)) { 1597de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 15987cd04fa7SDenis V. Lunev goto drop_and_release; 15991da177e4SLinus Torvalds } 16001da177e4SLinus Torvalds } 16011da177e4SLinus Torvalds /* Kill the following clause, if you dislike this way. */ 16021da177e4SLinus Torvalds else if (!sysctl_tcp_syncookies && 1603463c84b9SArnaldo Carvalho de Melo (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 16041da177e4SLinus Torvalds (sysctl_max_syn_backlog >> 2)) && 160581166dd6SDavid S. Miller !tcp_peer_is_proven(req, dst, false)) { 16061da177e4SLinus Torvalds /* Without syncookies last quarter of 16071da177e4SLinus Torvalds * backlog is filled with destinations, 16081da177e4SLinus Torvalds * proven to be alive. 16091da177e4SLinus Torvalds * It means that we continue to communicate 16101da177e4SLinus Torvalds * to destinations, already remembered 16111da177e4SLinus Torvalds * to the moment of synflood. 16121da177e4SLinus Torvalds */ 1613afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), 1614673d57e7SHarvey Harrison &saddr, ntohs(tcp_hdr(skb)->source)); 16157cd04fa7SDenis V. Lunev goto drop_and_release; 16161da177e4SLinus Torvalds } 16171da177e4SLinus Torvalds 1618a94f723dSGerrit Renker isn = tcp_v4_init_sequence(skb); 16191da177e4SLinus Torvalds } 16202e6599cbSArnaldo Carvalho de Melo tcp_rsk(req)->snt_isn = isn; 16211da177e4SLinus Torvalds 1622168a8f58SJerry Chu if (dst == NULL) { 1623168a8f58SJerry Chu dst = inet_csk_route_req(sk, &fl4, req); 1624168a8f58SJerry Chu if (dst == NULL) 1625168a8f58SJerry Chu goto drop_and_free; 1626168a8f58SJerry Chu } 1627168a8f58SJerry Chu do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); 1628168a8f58SJerry Chu 1629168a8f58SJerry Chu /* We don't call tcp_v4_send_synack() directly because we need 1630168a8f58SJerry Chu * to make sure a child socket can be created successfully before 1631168a8f58SJerry Chu * sending back synack! 1632168a8f58SJerry Chu * 1633168a8f58SJerry Chu * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() 1634168a8f58SJerry Chu * (or better yet, call tcp_send_synack() in the child context 1635168a8f58SJerry Chu * directly, but will have to fix bunch of other code first) 1636168a8f58SJerry Chu * after syn_recv_sock() except one will need to first fix the 1637168a8f58SJerry Chu * latter to remove its dependency on the current implementation 1638168a8f58SJerry Chu * of tcp_v4_send_synack()->tcp_select_initial_window(). 1639168a8f58SJerry Chu */ 1640168a8f58SJerry Chu skb_synack = tcp_make_synack(sk, dst, req, 1641fff32699SEric Dumazet (struct request_values *)&tmp_ext, 1642168a8f58SJerry Chu fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); 1643168a8f58SJerry Chu 1644168a8f58SJerry Chu if (skb_synack) { 1645168a8f58SJerry Chu __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); 1646168a8f58SJerry Chu skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); 1647168a8f58SJerry Chu } else 16481da177e4SLinus Torvalds goto drop_and_free; 16491da177e4SLinus Torvalds 1650168a8f58SJerry Chu if (likely(!do_fastopen)) { 1651168a8f58SJerry Chu int err; 1652168a8f58SJerry Chu err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, 1653168a8f58SJerry Chu ireq->rmt_addr, ireq->opt); 1654168a8f58SJerry Chu err = net_xmit_eval(err); 1655168a8f58SJerry Chu if (err || want_cookie) 1656168a8f58SJerry Chu goto drop_and_free; 1657168a8f58SJerry Chu 1658016818d0SNeal Cardwell tcp_rsk(req)->snt_synack = tcp_time_stamp; 1659168a8f58SJerry Chu tcp_rsk(req)->listener = NULL; 1660168a8f58SJerry Chu /* Add the request_sock to the SYN table */ 16613f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1662168a8f58SJerry Chu if (fastopen_cookie_present(&foc) && foc.len != 0) 1663168a8f58SJerry Chu NET_INC_STATS_BH(sock_net(sk), 1664168a8f58SJerry Chu LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 1665168a8f58SJerry Chu } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, 1666168a8f58SJerry Chu (struct request_values *)&tmp_ext)) 1667168a8f58SJerry Chu goto drop_and_free; 1668168a8f58SJerry Chu 16691da177e4SLinus Torvalds return 0; 16701da177e4SLinus Torvalds 16717cd04fa7SDenis V. Lunev drop_and_release: 16727cd04fa7SDenis V. Lunev dst_release(dst); 16731da177e4SLinus Torvalds drop_and_free: 167460236fddSArnaldo Carvalho de Melo reqsk_free(req); 16751da177e4SLinus Torvalds drop: 16761da177e4SLinus Torvalds return 0; 16771da177e4SLinus Torvalds } 16784bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds 16811da177e4SLinus Torvalds /* 16821da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 16831da177e4SLinus Torvalds * now create the new socket. 16841da177e4SLinus Torvalds */ 16851da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 168660236fddSArnaldo Carvalho de Melo struct request_sock *req, 16871da177e4SLinus Torvalds struct dst_entry *dst) 16881da177e4SLinus Torvalds { 16892e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 16901da177e4SLinus Torvalds struct inet_sock *newinet; 16911da177e4SLinus Torvalds struct tcp_sock *newtp; 16921da177e4SLinus Torvalds struct sock *newsk; 1693cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1694cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1695cfb6eeb4SYOSHIFUJI Hideaki #endif 1696f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 16971da177e4SLinus Torvalds 16981da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 16991da177e4SLinus Torvalds goto exit_overflow; 17001da177e4SLinus Torvalds 17011da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 17021da177e4SLinus Torvalds if (!newsk) 1703093d2823SBalazs Scheidler goto exit_nonewsk; 17041da177e4SLinus Torvalds 1705bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 1706fae6ef87SNeal Cardwell inet_sk_rx_dst_set(newsk, skb); 17071da177e4SLinus Torvalds 17081da177e4SLinus Torvalds newtp = tcp_sk(newsk); 17091da177e4SLinus Torvalds newinet = inet_sk(newsk); 17102e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1711c720c7e8SEric Dumazet newinet->inet_daddr = ireq->rmt_addr; 1712c720c7e8SEric Dumazet newinet->inet_rcv_saddr = ireq->loc_addr; 1713c720c7e8SEric Dumazet newinet->inet_saddr = ireq->loc_addr; 1714f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1715f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 17162e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1717463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1718eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 17194c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1720d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1721f6d8bd05SEric Dumazet if (inet_opt) 1722f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1723c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 17241da177e4SLinus Torvalds 1725dfd25fffSEric Dumazet if (!dst) { 1726dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1727dfd25fffSEric Dumazet if (!dst) 17280e734419SDavid S. Miller goto put_and_exit; 1729dfd25fffSEric Dumazet } else { 1730dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1731dfd25fffSEric Dumazet } 17320e734419SDavid S. Miller sk_setup_caps(newsk, dst); 17330e734419SDavid S. Miller 17345d424d5aSJohn Heffner tcp_mtup_init(newsk); 17351da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 17360dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1737f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1738f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1739f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1740f5fff5dcSTom Quetchenbach 17411da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 1742623df484SNeal Cardwell tcp_synack_rtt_meas(newsk, req); 17439ad7c049SJerry Chu newtp->total_retrans = req->retrans; 17441da177e4SLinus Torvalds 1745cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1746cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1747a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1748a915da9bSEric Dumazet AF_INET); 1749c720c7e8SEric Dumazet if (key != NULL) { 1750cfb6eeb4SYOSHIFUJI Hideaki /* 1751cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1752cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1753cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1754cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1755cfb6eeb4SYOSHIFUJI Hideaki */ 1756a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1757a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1758a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1759cfb6eeb4SYOSHIFUJI Hideaki } 1760cfb6eeb4SYOSHIFUJI Hideaki #endif 1761cfb6eeb4SYOSHIFUJI Hideaki 17620e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 17630e734419SDavid S. Miller goto put_and_exit; 17649327f705SEric Dumazet __inet_hash_nolisten(newsk, NULL); 17651da177e4SLinus Torvalds 17661da177e4SLinus Torvalds return newsk; 17671da177e4SLinus Torvalds 17681da177e4SLinus Torvalds exit_overflow: 1769de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1770093d2823SBalazs Scheidler exit_nonewsk: 1771093d2823SBalazs Scheidler dst_release(dst); 17721da177e4SLinus Torvalds exit: 1773de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 17741da177e4SLinus Torvalds return NULL; 17750e734419SDavid S. Miller put_and_exit: 1776709e8697SEric Dumazet tcp_clear_xmit_timers(newsk); 1777d8a6e65fSEric Dumazet tcp_cleanup_congestion_control(newsk); 1778918eb399SEric Dumazet bh_unlock_sock(newsk); 17790e734419SDavid S. Miller sock_put(newsk); 17800e734419SDavid S. Miller goto exit; 17811da177e4SLinus Torvalds } 17824bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 17831da177e4SLinus Torvalds 17841da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 17851da177e4SLinus Torvalds { 1786aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 1787eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 17881da177e4SLinus Torvalds struct sock *nsk; 178960236fddSArnaldo Carvalho de Melo struct request_sock **prev; 17901da177e4SLinus Torvalds /* Find possible connection requests. */ 1791463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 17921da177e4SLinus Torvalds iph->saddr, iph->daddr); 17931da177e4SLinus Torvalds if (req) 17948336886fSJerry Chu return tcp_check_req(sk, skb, req, prev, false); 17951da177e4SLinus Torvalds 17963b1e0a65SYOSHIFUJI Hideaki nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1797c67499c0SPavel Emelyanov th->source, iph->daddr, th->dest, inet_iif(skb)); 17981da177e4SLinus Torvalds 17991da177e4SLinus Torvalds if (nsk) { 18001da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 18011da177e4SLinus Torvalds bh_lock_sock(nsk); 18021da177e4SLinus Torvalds return nsk; 18031da177e4SLinus Torvalds } 18049469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 18051da177e4SLinus Torvalds return NULL; 18061da177e4SLinus Torvalds } 18071da177e4SLinus Torvalds 18081da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1809af9b4738SFlorian Westphal if (!th->syn) 18101da177e4SLinus Torvalds sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 18111da177e4SLinus Torvalds #endif 18121da177e4SLinus Torvalds return sk; 18131da177e4SLinus Torvalds } 18141da177e4SLinus Torvalds 1815b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 18161da177e4SLinus Torvalds { 1817eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1818eddc9ec5SArnaldo Carvalho de Melo 181984fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_COMPLETE) { 1820eddc9ec5SArnaldo Carvalho de Melo if (!tcp_v4_check(skb->len, iph->saddr, 1821eddc9ec5SArnaldo Carvalho de Melo iph->daddr, skb->csum)) { 18221da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 1823fb286bb2SHerbert Xu return 0; 1824fb286bb2SHerbert Xu } 1825fb286bb2SHerbert Xu } 1826fb286bb2SHerbert Xu 1827eddc9ec5SArnaldo Carvalho de Melo skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1828fb286bb2SHerbert Xu skb->len, IPPROTO_TCP, 0); 1829fb286bb2SHerbert Xu 1830fb286bb2SHerbert Xu if (skb->len <= 76) { 1831fb286bb2SHerbert Xu return __skb_checksum_complete(skb); 18321da177e4SLinus Torvalds } 18331da177e4SLinus Torvalds return 0; 18341da177e4SLinus Torvalds } 18351da177e4SLinus Torvalds 18361da177e4SLinus Torvalds 18371da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 18381da177e4SLinus Torvalds * here. 18391da177e4SLinus Torvalds * 18401da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 18411da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 18421da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 18431da177e4SLinus Torvalds * held. 18441da177e4SLinus Torvalds */ 18451da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 18461da177e4SLinus Torvalds { 1847cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1848cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1849cfb6eeb4SYOSHIFUJI Hideaki /* 1850cfb6eeb4SYOSHIFUJI Hideaki * We really want to reject the packet as early as possible 1851cfb6eeb4SYOSHIFUJI Hideaki * if: 1852cfb6eeb4SYOSHIFUJI Hideaki * o We're expecting an MD5'd packet and this is no MD5 tcp option 1853cfb6eeb4SYOSHIFUJI Hideaki * o There is an MD5 option and we're not expecting one 1854cfb6eeb4SYOSHIFUJI Hideaki */ 1855cfb6eeb4SYOSHIFUJI Hideaki if (tcp_v4_inbound_md5_hash(sk, skb)) 1856cfb6eeb4SYOSHIFUJI Hideaki goto discard; 1857cfb6eeb4SYOSHIFUJI Hideaki #endif 1858cfb6eeb4SYOSHIFUJI Hideaki 18591da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 186092101b3bSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1861404e0a8bSEric Dumazet 1862404e0a8bSEric Dumazet sock_rps_save_rxhash(sk, skb); 1863404e0a8bSEric Dumazet if (dst) { 1864505fbcf0SEric Dumazet if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1865505fbcf0SEric Dumazet dst->ops->check(dst, 0) == NULL) { 186692101b3bSDavid S. Miller dst_release(dst); 186792101b3bSDavid S. Miller sk->sk_rx_dst = NULL; 186892101b3bSDavid S. Miller } 186992101b3bSDavid S. Miller } 1870aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1871cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 18721da177e4SLinus Torvalds goto reset; 1873cfb6eeb4SYOSHIFUJI Hideaki } 18741da177e4SLinus Torvalds return 0; 18751da177e4SLinus Torvalds } 18761da177e4SLinus Torvalds 1877ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 18781da177e4SLinus Torvalds goto csum_err; 18791da177e4SLinus Torvalds 18801da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 18811da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 18821da177e4SLinus Torvalds if (!nsk) 18831da177e4SLinus Torvalds goto discard; 18841da177e4SLinus Torvalds 18851da177e4SLinus Torvalds if (nsk != sk) { 1886bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 1887cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1888cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 18891da177e4SLinus Torvalds goto reset; 1890cfb6eeb4SYOSHIFUJI Hideaki } 18911da177e4SLinus Torvalds return 0; 18921da177e4SLinus Torvalds } 1893ca55158cSEric Dumazet } else 1894bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1895ca55158cSEric Dumazet 1896aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1897cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 18981da177e4SLinus Torvalds goto reset; 1899cfb6eeb4SYOSHIFUJI Hideaki } 19001da177e4SLinus Torvalds return 0; 19011da177e4SLinus Torvalds 19021da177e4SLinus Torvalds reset: 1903cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 19041da177e4SLinus Torvalds discard: 19051da177e4SLinus Torvalds kfree_skb(skb); 19061da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 19071da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 19081da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 19091da177e4SLinus Torvalds * but you have been warned. 19101da177e4SLinus Torvalds */ 19111da177e4SLinus Torvalds return 0; 19121da177e4SLinus Torvalds 19131da177e4SLinus Torvalds csum_err: 191463231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 19151da177e4SLinus Torvalds goto discard; 19161da177e4SLinus Torvalds } 19174bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 19181da177e4SLinus Torvalds 1919160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb) 192041063e9dSDavid S. Miller { 192141063e9dSDavid S. Miller const struct iphdr *iph; 192241063e9dSDavid S. Miller const struct tcphdr *th; 192341063e9dSDavid S. Miller struct sock *sk; 192441063e9dSDavid S. Miller 192541063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 1926160eb5a6SDavid S. Miller return; 192741063e9dSDavid S. Miller 192845f00f99SEric Dumazet if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1929160eb5a6SDavid S. Miller return; 193041063e9dSDavid S. Miller 193141063e9dSDavid S. Miller iph = ip_hdr(skb); 193245f00f99SEric Dumazet th = tcp_hdr(skb); 193341063e9dSDavid S. Miller 193441063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 1935160eb5a6SDavid S. Miller return; 193641063e9dSDavid S. Miller 193745f00f99SEric Dumazet sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 193841063e9dSDavid S. Miller iph->saddr, th->source, 19397011d085SVijay Subramanian iph->daddr, ntohs(th->dest), 19409cb429d6SEric Dumazet skb->skb_iif); 194141063e9dSDavid S. Miller if (sk) { 194241063e9dSDavid S. Miller skb->sk = sk; 194341063e9dSDavid S. Miller skb->destructor = sock_edemux; 194441063e9dSDavid S. Miller if (sk->sk_state != TCP_TIME_WAIT) { 194541063e9dSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1946505fbcf0SEric Dumazet 194741063e9dSDavid S. Miller if (dst) 194841063e9dSDavid S. Miller dst = dst_check(dst, 0); 194992101b3bSDavid S. Miller if (dst && 1950505fbcf0SEric Dumazet inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 195141063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 195241063e9dSDavid S. Miller } 195341063e9dSDavid S. Miller } 195441063e9dSDavid S. Miller } 195541063e9dSDavid S. Miller 19561da177e4SLinus Torvalds /* 19571da177e4SLinus Torvalds * From tcp_input.c 19581da177e4SLinus Torvalds */ 19591da177e4SLinus Torvalds 19601da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 19611da177e4SLinus Torvalds { 1962eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1963cf533ea5SEric Dumazet const struct tcphdr *th; 19641da177e4SLinus Torvalds struct sock *sk; 19651da177e4SLinus Torvalds int ret; 1966a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 19671da177e4SLinus Torvalds 19681da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 19691da177e4SLinus Torvalds goto discard_it; 19701da177e4SLinus Torvalds 19711da177e4SLinus Torvalds /* Count it even if it's bad */ 197263231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 19731da177e4SLinus Torvalds 19741da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 19751da177e4SLinus Torvalds goto discard_it; 19761da177e4SLinus Torvalds 1977aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 19781da177e4SLinus Torvalds 19791da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 19801da177e4SLinus Torvalds goto bad_packet; 19811da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 19821da177e4SLinus Torvalds goto discard_it; 19831da177e4SLinus Torvalds 19841da177e4SLinus Torvalds /* An explanation is required here, I think. 19851da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1986caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 19871da177e4SLinus Torvalds * So, we defer the checks. */ 198860476372SHerbert Xu if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 19891da177e4SLinus Torvalds goto bad_packet; 19901da177e4SLinus Torvalds 1991aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1992eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 19931da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 19941da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 19951da177e4SLinus Torvalds skb->len - th->doff * 4); 19961da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 19971da177e4SLinus Torvalds TCP_SKB_CB(skb)->when = 0; 1998b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 19991da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 20001da177e4SLinus Torvalds 20019a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 20021da177e4SLinus Torvalds if (!sk) 20031da177e4SLinus Torvalds goto no_tcp_socket; 20041da177e4SLinus Torvalds 2005bb134d5dSEric Dumazet process: 2006bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 2007bb134d5dSEric Dumazet goto do_time_wait; 2008bb134d5dSEric Dumazet 20096cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 20106cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 2011d218d111SStephen Hemminger goto discard_and_relse; 20126cce09f8SEric Dumazet } 2013d218d111SStephen Hemminger 20141da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 20151da177e4SLinus Torvalds goto discard_and_relse; 2016b59c2701SPatrick McHardy nf_reset(skb); 20171da177e4SLinus Torvalds 2018fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 20191da177e4SLinus Torvalds goto discard_and_relse; 20201da177e4SLinus Torvalds 20211da177e4SLinus Torvalds skb->dev = NULL; 20221da177e4SLinus Torvalds 2023c6366184SIngo Molnar bh_lock_sock_nested(sk); 20241da177e4SLinus Torvalds ret = 0; 20251da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 20261a2449a8SChris Leech #ifdef CONFIG_NET_DMA 20271a2449a8SChris Leech struct tcp_sock *tp = tcp_sk(sk); 20281a2449a8SChris Leech if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 2029a2bd1140SDave Jiang tp->ucopy.dma_chan = net_dma_find_channel(); 20301a2449a8SChris Leech if (tp->ucopy.dma_chan) 20311a2449a8SChris Leech ret = tcp_v4_do_rcv(sk, skb); 20321a2449a8SChris Leech else 20331a2449a8SChris Leech #endif 20341a2449a8SChris Leech { 20351da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 20361da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 20371a2449a8SChris Leech } 2038da882c1fSEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb, 2039da882c1fSEric Dumazet sk->sk_rcvbuf + sk->sk_sndbuf))) { 20406b03a53aSZhu Yi bh_unlock_sock(sk); 20416cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 20426b03a53aSZhu Yi goto discard_and_relse; 20436b03a53aSZhu Yi } 20441da177e4SLinus Torvalds bh_unlock_sock(sk); 20451da177e4SLinus Torvalds 20461da177e4SLinus Torvalds sock_put(sk); 20471da177e4SLinus Torvalds 20481da177e4SLinus Torvalds return ret; 20491da177e4SLinus Torvalds 20501da177e4SLinus Torvalds no_tcp_socket: 20511da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 20521da177e4SLinus Torvalds goto discard_it; 20531da177e4SLinus Torvalds 20541da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 20551da177e4SLinus Torvalds bad_packet: 205663231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 20571da177e4SLinus Torvalds } else { 2058cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 20591da177e4SLinus Torvalds } 20601da177e4SLinus Torvalds 20611da177e4SLinus Torvalds discard_it: 20621da177e4SLinus Torvalds /* Discard frame. */ 20631da177e4SLinus Torvalds kfree_skb(skb); 20641da177e4SLinus Torvalds return 0; 20651da177e4SLinus Torvalds 20661da177e4SLinus Torvalds discard_and_relse: 20671da177e4SLinus Torvalds sock_put(sk); 20681da177e4SLinus Torvalds goto discard_it; 20691da177e4SLinus Torvalds 20701da177e4SLinus Torvalds do_time_wait: 20711da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 20729469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 20731da177e4SLinus Torvalds goto discard_it; 20741da177e4SLinus Torvalds } 20751da177e4SLinus Torvalds 20761da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 207763231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 20789469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 20791da177e4SLinus Torvalds goto discard_it; 20801da177e4SLinus Torvalds } 20819469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 20821da177e4SLinus Torvalds case TCP_TW_SYN: { 2083c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 2084c67499c0SPavel Emelyanov &tcp_hashinfo, 2085eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 2086463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 20871da177e4SLinus Torvalds if (sk2) { 20889469c7b4SYOSHIFUJI Hideaki inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 20899469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 20901da177e4SLinus Torvalds sk = sk2; 20911da177e4SLinus Torvalds goto process; 20921da177e4SLinus Torvalds } 20931da177e4SLinus Torvalds /* Fall through to ACK */ 20941da177e4SLinus Torvalds } 20951da177e4SLinus Torvalds case TCP_TW_ACK: 20961da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 20971da177e4SLinus Torvalds break; 20981da177e4SLinus Torvalds case TCP_TW_RST: 20991da177e4SLinus Torvalds goto no_tcp_socket; 21001da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 21011da177e4SLinus Torvalds } 21021da177e4SLinus Torvalds goto discard_it; 21031da177e4SLinus Torvalds } 21041da177e4SLinus Torvalds 2105ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 2106ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 2107ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 2108ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 2109ccb7c410SDavid S. Miller }; 21101da177e4SLinus Torvalds 211163d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 21125d299f3dSEric Dumazet { 21135d299f3dSEric Dumazet struct dst_entry *dst = skb_dst(skb); 21145d299f3dSEric Dumazet 21155d299f3dSEric Dumazet dst_hold(dst); 21165d299f3dSEric Dumazet sk->sk_rx_dst = dst; 21175d299f3dSEric Dumazet inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 21185d299f3dSEric Dumazet } 211963d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set); 21205d299f3dSEric Dumazet 21213b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 21221da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 21231da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 212432519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 21255d299f3dSEric Dumazet .sk_rx_dst_set = inet_sk_rx_dst_set, 21261da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 21271da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 21281da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 21291da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 21301da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 2131543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 2132543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 2133ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 21343fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 21353fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 21363fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 21373fdadf7dSDmitry Mishin #endif 21381da177e4SLinus Torvalds }; 21394bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 21401da177e4SLinus Torvalds 2141cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 2142b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 2143cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 214449a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 2145cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 2146cfb6eeb4SYOSHIFUJI Hideaki }; 2147b6332e6cSAndrew Morton #endif 2148cfb6eeb4SYOSHIFUJI Hideaki 21491da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 21501da177e4SLinus Torvalds * sk_alloc() so need not be done here. 21511da177e4SLinus Torvalds */ 21521da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 21531da177e4SLinus Torvalds { 21546687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 21551da177e4SLinus Torvalds 2156900f65d3SNeal Cardwell tcp_init_sock(sk); 21571da177e4SLinus Torvalds 21588292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 2159900f65d3SNeal Cardwell 2160cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 2161ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 2162cfb6eeb4SYOSHIFUJI Hideaki #endif 21631da177e4SLinus Torvalds 21641da177e4SLinus Torvalds return 0; 21651da177e4SLinus Torvalds } 21661da177e4SLinus Torvalds 21677d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 21681da177e4SLinus Torvalds { 21691da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 21701da177e4SLinus Torvalds 21711da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 21721da177e4SLinus Torvalds 21736687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 2174317a76f9SStephen Hemminger 21751da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 2176fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 21771da177e4SLinus Torvalds 21781da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 21791da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 21801da177e4SLinus Torvalds 2181cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 2182cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 2183cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 2184a915da9bSEric Dumazet tcp_clear_md5_list(sk); 2185a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 2186cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 2187cfb6eeb4SYOSHIFUJI Hideaki } 2188cfb6eeb4SYOSHIFUJI Hideaki #endif 2189cfb6eeb4SYOSHIFUJI Hideaki 21901a2449a8SChris Leech #ifdef CONFIG_NET_DMA 21911a2449a8SChris Leech /* Cleans up our sk_async_wait_queue */ 21921a2449a8SChris Leech __skb_queue_purge(&sk->sk_async_wait_queue); 21931a2449a8SChris Leech #endif 21941a2449a8SChris Leech 21951da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 21961da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 21971da177e4SLinus Torvalds 21981da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 2199463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 2200ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 22011da177e4SLinus Torvalds 2202435cf559SWilliam Allen Simpson /* TCP Cookie Transactions */ 2203435cf559SWilliam Allen Simpson if (tp->cookie_values != NULL) { 2204435cf559SWilliam Allen Simpson kref_put(&tp->cookie_values->kref, 2205435cf559SWilliam Allen Simpson tcp_cookie_values_release); 2206435cf559SWilliam Allen Simpson tp->cookie_values = NULL; 2207435cf559SWilliam Allen Simpson } 2208168a8f58SJerry Chu BUG_ON(tp->fastopen_rsk != NULL); 2209435cf559SWilliam Allen Simpson 2210cf60af03SYuchung Cheng /* If socket is aborted during connect operation */ 2211cf60af03SYuchung Cheng tcp_free_fastopen_req(tp); 2212cf60af03SYuchung Cheng 2213180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 2214d1a4c0b3SGlauber Costa sock_release_memcg(sk); 22151da177e4SLinus Torvalds } 22161da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 22171da177e4SLinus Torvalds 22181da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 22191da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 22201da177e4SLinus Torvalds 22213ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 22221da177e4SLinus Torvalds { 22233ab5aee7SEric Dumazet return hlist_nulls_empty(head) ? NULL : 22248feaf0c0SArnaldo Carvalho de Melo list_entry(head->first, struct inet_timewait_sock, tw_node); 22251da177e4SLinus Torvalds } 22261da177e4SLinus Torvalds 22278feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 22281da177e4SLinus Torvalds { 22293ab5aee7SEric Dumazet return !is_a_nulls(tw->tw_node.next) ? 22303ab5aee7SEric Dumazet hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 22311da177e4SLinus Torvalds } 22321da177e4SLinus Torvalds 2233a8b690f9STom Herbert /* 2234a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 2235a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 2236a8b690f9STom Herbert * very first socket in the hash table is returned. 2237a8b690f9STom Herbert */ 22381da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 22391da177e4SLinus Torvalds { 2240463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 2241c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 22421da177e4SLinus Torvalds struct sock *sk = cur; 22435caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 22441da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2245a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 22461da177e4SLinus Torvalds 22471da177e4SLinus Torvalds if (!sk) { 2248a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 22495caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2250c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 2251a8b690f9STom Herbert st->offset = 0; 22521da177e4SLinus Torvalds goto get_sk; 22531da177e4SLinus Torvalds } 22545caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 22551da177e4SLinus Torvalds ++st->num; 2256a8b690f9STom Herbert ++st->offset; 22571da177e4SLinus Torvalds 22581da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 225960236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 22601da177e4SLinus Torvalds 2261463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 22621da177e4SLinus Torvalds req = req->dl_next; 22631da177e4SLinus Torvalds while (1) { 22641da177e4SLinus Torvalds while (req) { 2265bdccc4caSDaniel Lezcano if (req->rsk_ops->family == st->family) { 22661da177e4SLinus Torvalds cur = req; 22671da177e4SLinus Torvalds goto out; 22681da177e4SLinus Torvalds } 22691da177e4SLinus Torvalds req = req->dl_next; 22701da177e4SLinus Torvalds } 227172a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 22721da177e4SLinus Torvalds break; 22731da177e4SLinus Torvalds get_req: 2274463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 22751da177e4SLinus Torvalds } 22761bde5ac4SEric Dumazet sk = sk_nulls_next(st->syn_wait_sk); 22771da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 2278463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 22791da177e4SLinus Torvalds } else { 2280463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2281463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2282463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 22831da177e4SLinus Torvalds goto start_req; 2284463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 22851bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 22861da177e4SLinus Torvalds } 22871da177e4SLinus Torvalds get_sk: 2288c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 22898475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 22908475ef9fSPavel Emelyanov continue; 22918475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 22921da177e4SLinus Torvalds cur = sk; 22931da177e4SLinus Torvalds goto out; 22941da177e4SLinus Torvalds } 2295463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2296463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2297463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 22981da177e4SLinus Torvalds start_req: 22991da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 23001da177e4SLinus Torvalds st->syn_wait_sk = sk; 23011da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 23021da177e4SLinus Torvalds st->sbucket = 0; 23031da177e4SLinus Torvalds goto get_req; 23041da177e4SLinus Torvalds } 2305463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 23061da177e4SLinus Torvalds } 23075caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 2308a8b690f9STom Herbert st->offset = 0; 23090f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 23105caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 23115caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2312c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 23131da177e4SLinus Torvalds goto get_sk; 23141da177e4SLinus Torvalds } 23151da177e4SLinus Torvalds cur = NULL; 23161da177e4SLinus Torvalds out: 23171da177e4SLinus Torvalds return cur; 23181da177e4SLinus Torvalds } 23191da177e4SLinus Torvalds 23201da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 23211da177e4SLinus Torvalds { 2322a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2323a8b690f9STom Herbert void *rc; 2324a8b690f9STom Herbert 2325a8b690f9STom Herbert st->bucket = 0; 2326a8b690f9STom Herbert st->offset = 0; 2327a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 23281da177e4SLinus Torvalds 23291da177e4SLinus Torvalds while (rc && *pos) { 23301da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 23311da177e4SLinus Torvalds --*pos; 23321da177e4SLinus Torvalds } 23331da177e4SLinus Torvalds return rc; 23341da177e4SLinus Torvalds } 23351da177e4SLinus Torvalds 2336a2a385d6SEric Dumazet static inline bool empty_bucket(struct tcp_iter_state *st) 23376eac5604SAndi Kleen { 23383ab5aee7SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 23393ab5aee7SEric Dumazet hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 23406eac5604SAndi Kleen } 23416eac5604SAndi Kleen 2342a8b690f9STom Herbert /* 2343a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 2344a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 2345a8b690f9STom Herbert */ 23461da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 23471da177e4SLinus Torvalds { 23481da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2349a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 23501da177e4SLinus Torvalds void *rc = NULL; 23511da177e4SLinus Torvalds 2352a8b690f9STom Herbert st->offset = 0; 2353a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 23541da177e4SLinus Torvalds struct sock *sk; 23553ab5aee7SEric Dumazet struct hlist_nulls_node *node; 23568feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 23579db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 23581da177e4SLinus Torvalds 23596eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 23606eac5604SAndi Kleen if (empty_bucket(st)) 23616eac5604SAndi Kleen continue; 23626eac5604SAndi Kleen 23639db66bdcSEric Dumazet spin_lock_bh(lock); 23643ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2365f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 2366878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 23671da177e4SLinus Torvalds continue; 23681da177e4SLinus Torvalds } 23691da177e4SLinus Torvalds rc = sk; 23701da177e4SLinus Torvalds goto out; 23711da177e4SLinus Torvalds } 23721da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 23738feaf0c0SArnaldo Carvalho de Melo inet_twsk_for_each(tw, node, 2374dbca9b27SEric Dumazet &tcp_hashinfo.ehash[st->bucket].twchain) { 237528518fc1SPavel Emelyanov if (tw->tw_family != st->family || 2376878628fbSYOSHIFUJI Hideaki !net_eq(twsk_net(tw), net)) { 23771da177e4SLinus Torvalds continue; 23781da177e4SLinus Torvalds } 23791da177e4SLinus Torvalds rc = tw; 23801da177e4SLinus Torvalds goto out; 23811da177e4SLinus Torvalds } 23829db66bdcSEric Dumazet spin_unlock_bh(lock); 23831da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 23841da177e4SLinus Torvalds } 23851da177e4SLinus Torvalds out: 23861da177e4SLinus Torvalds return rc; 23871da177e4SLinus Torvalds } 23881da177e4SLinus Torvalds 23891da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 23901da177e4SLinus Torvalds { 23911da177e4SLinus Torvalds struct sock *sk = cur; 23928feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 23933ab5aee7SEric Dumazet struct hlist_nulls_node *node; 23941da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2395a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 23961da177e4SLinus Torvalds 23971da177e4SLinus Torvalds ++st->num; 2398a8b690f9STom Herbert ++st->offset; 23991da177e4SLinus Torvalds 24001da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 24011da177e4SLinus Torvalds tw = cur; 24021da177e4SLinus Torvalds tw = tw_next(tw); 24031da177e4SLinus Torvalds get_tw: 2404878628fbSYOSHIFUJI Hideaki while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 24051da177e4SLinus Torvalds tw = tw_next(tw); 24061da177e4SLinus Torvalds } 24071da177e4SLinus Torvalds if (tw) { 24081da177e4SLinus Torvalds cur = tw; 24091da177e4SLinus Torvalds goto out; 24101da177e4SLinus Torvalds } 24119db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 24121da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 24131da177e4SLinus Torvalds 24146eac5604SAndi Kleen /* Look for next non empty bucket */ 2415a8b690f9STom Herbert st->offset = 0; 2416f373b53bSEric Dumazet while (++st->bucket <= tcp_hashinfo.ehash_mask && 24176eac5604SAndi Kleen empty_bucket(st)) 24186eac5604SAndi Kleen ; 2419f373b53bSEric Dumazet if (st->bucket > tcp_hashinfo.ehash_mask) 24206eac5604SAndi Kleen return NULL; 24216eac5604SAndi Kleen 24229db66bdcSEric Dumazet spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 24233ab5aee7SEric Dumazet sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 24241da177e4SLinus Torvalds } else 24253ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 24261da177e4SLinus Torvalds 24273ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 2428878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 24291da177e4SLinus Torvalds goto found; 24301da177e4SLinus Torvalds } 24311da177e4SLinus Torvalds 24321da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 2433dbca9b27SEric Dumazet tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 24341da177e4SLinus Torvalds goto get_tw; 24351da177e4SLinus Torvalds found: 24361da177e4SLinus Torvalds cur = sk; 24371da177e4SLinus Torvalds out: 24381da177e4SLinus Torvalds return cur; 24391da177e4SLinus Torvalds } 24401da177e4SLinus Torvalds 24411da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 24421da177e4SLinus Torvalds { 2443a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2444a8b690f9STom Herbert void *rc; 2445a8b690f9STom Herbert 2446a8b690f9STom Herbert st->bucket = 0; 2447a8b690f9STom Herbert rc = established_get_first(seq); 24481da177e4SLinus Torvalds 24491da177e4SLinus Torvalds while (rc && pos) { 24501da177e4SLinus Torvalds rc = established_get_next(seq, rc); 24511da177e4SLinus Torvalds --pos; 24521da177e4SLinus Torvalds } 24531da177e4SLinus Torvalds return rc; 24541da177e4SLinus Torvalds } 24551da177e4SLinus Torvalds 24561da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 24571da177e4SLinus Torvalds { 24581da177e4SLinus Torvalds void *rc; 24591da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 24601da177e4SLinus Torvalds 24611da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 24621da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 24631da177e4SLinus Torvalds 24641da177e4SLinus Torvalds if (!rc) { 24651da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 24661da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 24671da177e4SLinus Torvalds } 24681da177e4SLinus Torvalds 24691da177e4SLinus Torvalds return rc; 24701da177e4SLinus Torvalds } 24711da177e4SLinus Torvalds 2472a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2473a8b690f9STom Herbert { 2474a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2475a8b690f9STom Herbert int offset = st->offset; 2476a8b690f9STom Herbert int orig_num = st->num; 2477a8b690f9STom Herbert void *rc = NULL; 2478a8b690f9STom Herbert 2479a8b690f9STom Herbert switch (st->state) { 2480a8b690f9STom Herbert case TCP_SEQ_STATE_OPENREQ: 2481a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2482a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2483a8b690f9STom Herbert break; 2484a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2485a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2486a8b690f9STom Herbert while (offset-- && rc) 2487a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2488a8b690f9STom Herbert if (rc) 2489a8b690f9STom Herbert break; 2490a8b690f9STom Herbert st->bucket = 0; 2491a8b690f9STom Herbert /* Fallthrough */ 2492a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2493a8b690f9STom Herbert case TCP_SEQ_STATE_TIME_WAIT: 2494a8b690f9STom Herbert st->state = TCP_SEQ_STATE_ESTABLISHED; 2495a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2496a8b690f9STom Herbert break; 2497a8b690f9STom Herbert rc = established_get_first(seq); 2498a8b690f9STom Herbert while (offset-- && rc) 2499a8b690f9STom Herbert rc = established_get_next(seq, rc); 2500a8b690f9STom Herbert } 2501a8b690f9STom Herbert 2502a8b690f9STom Herbert st->num = orig_num; 2503a8b690f9STom Herbert 2504a8b690f9STom Herbert return rc; 2505a8b690f9STom Herbert } 2506a8b690f9STom Herbert 25071da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 25081da177e4SLinus Torvalds { 25091da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2510a8b690f9STom Herbert void *rc; 2511a8b690f9STom Herbert 2512a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2513a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2514a8b690f9STom Herbert if (rc) 2515a8b690f9STom Herbert goto out; 2516a8b690f9STom Herbert } 2517a8b690f9STom Herbert 25181da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 25191da177e4SLinus Torvalds st->num = 0; 2520a8b690f9STom Herbert st->bucket = 0; 2521a8b690f9STom Herbert st->offset = 0; 2522a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2523a8b690f9STom Herbert 2524a8b690f9STom Herbert out: 2525a8b690f9STom Herbert st->last_pos = *pos; 2526a8b690f9STom Herbert return rc; 25271da177e4SLinus Torvalds } 25281da177e4SLinus Torvalds 25291da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25301da177e4SLinus Torvalds { 2531a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 25321da177e4SLinus Torvalds void *rc = NULL; 25331da177e4SLinus Torvalds 25341da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 25351da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 25361da177e4SLinus Torvalds goto out; 25371da177e4SLinus Torvalds } 25381da177e4SLinus Torvalds 25391da177e4SLinus Torvalds switch (st->state) { 25401da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 25411da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 25421da177e4SLinus Torvalds rc = listening_get_next(seq, v); 25431da177e4SLinus Torvalds if (!rc) { 25441da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2545a8b690f9STom Herbert st->bucket = 0; 2546a8b690f9STom Herbert st->offset = 0; 25471da177e4SLinus Torvalds rc = established_get_first(seq); 25481da177e4SLinus Torvalds } 25491da177e4SLinus Torvalds break; 25501da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 25511da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 25521da177e4SLinus Torvalds rc = established_get_next(seq, v); 25531da177e4SLinus Torvalds break; 25541da177e4SLinus Torvalds } 25551da177e4SLinus Torvalds out: 25561da177e4SLinus Torvalds ++*pos; 2557a8b690f9STom Herbert st->last_pos = *pos; 25581da177e4SLinus Torvalds return rc; 25591da177e4SLinus Torvalds } 25601da177e4SLinus Torvalds 25611da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 25621da177e4SLinus Torvalds { 25631da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 25641da177e4SLinus Torvalds 25651da177e4SLinus Torvalds switch (st->state) { 25661da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 25671da177e4SLinus Torvalds if (v) { 2568463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2569463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 25701da177e4SLinus Torvalds } 25711da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 25721da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 25735caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 25741da177e4SLinus Torvalds break; 25751da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 25761da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 25771da177e4SLinus Torvalds if (v) 25789db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 25791da177e4SLinus Torvalds break; 25801da177e4SLinus Torvalds } 25811da177e4SLinus Torvalds } 25821da177e4SLinus Torvalds 258373cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 25841da177e4SLinus Torvalds { 25851da177e4SLinus Torvalds struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 25861da177e4SLinus Torvalds struct tcp_iter_state *s; 258752d6f3f1SDenis V. Lunev int err; 25881da177e4SLinus Torvalds 258952d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 259052d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 259152d6f3f1SDenis V. Lunev if (err < 0) 259252d6f3f1SDenis V. Lunev return err; 2593f40c8174SDaniel Lezcano 259452d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 25951da177e4SLinus Torvalds s->family = afinfo->family; 2596a8b690f9STom Herbert s->last_pos = 0; 2597f40c8174SDaniel Lezcano return 0; 2598f40c8174SDaniel Lezcano } 259973cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2600f40c8174SDaniel Lezcano 26016f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 26021da177e4SLinus Torvalds { 26031da177e4SLinus Torvalds int rc = 0; 26041da177e4SLinus Torvalds struct proc_dir_entry *p; 26051da177e4SLinus Torvalds 26069427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 26079427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 26089427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 26099427c4b3SDenis V. Lunev 261084841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 261173cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 261284841c3cSDenis V. Lunev if (!p) 26131da177e4SLinus Torvalds rc = -ENOMEM; 26141da177e4SLinus Torvalds return rc; 26151da177e4SLinus Torvalds } 26164bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 26171da177e4SLinus Torvalds 26186f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 26191da177e4SLinus Torvalds { 26206f8b13bcSDaniel Lezcano proc_net_remove(net, afinfo->name); 26211da177e4SLinus Torvalds } 26224bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 26231da177e4SLinus Torvalds 2624cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req, 2625a7cb5a49SEric W. Biederman struct seq_file *f, int i, kuid_t uid, int *len) 26261da177e4SLinus Torvalds { 26272e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 2628a399a805SEric Dumazet long delta = req->expires - jiffies; 26291da177e4SLinus Torvalds 26305e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 263171338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", 26321da177e4SLinus Torvalds i, 26332e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 2634c720c7e8SEric Dumazet ntohs(inet_sk(sk)->inet_sport), 26352e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 26362e6599cbSArnaldo Carvalho de Melo ntohs(ireq->rmt_port), 26371da177e4SLinus Torvalds TCP_SYN_RECV, 26381da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 26391da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 2640a399a805SEric Dumazet jiffies_delta_to_clock_t(delta), 26411da177e4SLinus Torvalds req->retrans, 2642a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), uid), 26431da177e4SLinus Torvalds 0, /* non standard timer */ 26441da177e4SLinus Torvalds 0, /* open_requests have no inode */ 26451da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 26465e659e4cSPavel Emelyanov req, 26475e659e4cSPavel Emelyanov len); 26481da177e4SLinus Torvalds } 26491da177e4SLinus Torvalds 26505e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) 26511da177e4SLinus Torvalds { 26521da177e4SLinus Torvalds int timer_active; 26531da177e4SLinus Torvalds unsigned long timer_expires; 2654cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2655cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2656cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 2657168a8f58SJerry Chu struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; 2658c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2659c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2660c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2661c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 266249d09007SEric Dumazet int rx_queue; 26631da177e4SLinus Torvalds 2664463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 26651da177e4SLinus Torvalds timer_active = 1; 2666463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2667463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 26681da177e4SLinus Torvalds timer_active = 4; 2669463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2670cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 26711da177e4SLinus Torvalds timer_active = 2; 2672cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 26731da177e4SLinus Torvalds } else { 26741da177e4SLinus Torvalds timer_active = 0; 26751da177e4SLinus Torvalds timer_expires = jiffies; 26761da177e4SLinus Torvalds } 26771da177e4SLinus Torvalds 267849d09007SEric Dumazet if (sk->sk_state == TCP_LISTEN) 267949d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 268049d09007SEric Dumazet else 268149d09007SEric Dumazet /* 268249d09007SEric Dumazet * because we dont lock socket, we might find a transient negative value 268349d09007SEric Dumazet */ 268449d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 268549d09007SEric Dumazet 26865e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 268771338aa7SDan Rosenberg "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", 2688cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 268947da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 269049d09007SEric Dumazet rx_queue, 26911da177e4SLinus Torvalds timer_active, 2692a399a805SEric Dumazet jiffies_delta_to_clock_t(timer_expires - jiffies), 2693463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2694a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 26956687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2696cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2697cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 26987be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 26997be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2700463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 27011da177e4SLinus Torvalds tp->snd_cwnd, 2702168a8f58SJerry Chu sk->sk_state == TCP_LISTEN ? 2703168a8f58SJerry Chu (fastopenq ? fastopenq->max_qlen : 0) : 2704168a8f58SJerry Chu (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), 27055e659e4cSPavel Emelyanov len); 27061da177e4SLinus Torvalds } 27071da177e4SLinus Torvalds 2708cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 27095e659e4cSPavel Emelyanov struct seq_file *f, int i, int *len) 27101da177e4SLinus Torvalds { 271123f33c2dSAl Viro __be32 dest, src; 27121da177e4SLinus Torvalds __u16 destp, srcp; 2713a399a805SEric Dumazet long delta = tw->tw_ttd - jiffies; 27141da177e4SLinus Torvalds 27151da177e4SLinus Torvalds dest = tw->tw_daddr; 27161da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 27171da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 27181da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 27191da177e4SLinus Torvalds 27205e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 272171338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", 27221da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2723a399a805SEric Dumazet 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 27245e659e4cSPavel Emelyanov atomic_read(&tw->tw_refcnt), tw, len); 27251da177e4SLinus Torvalds } 27261da177e4SLinus Torvalds 27271da177e4SLinus Torvalds #define TMPSZ 150 27281da177e4SLinus Torvalds 27291da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 27301da177e4SLinus Torvalds { 27311da177e4SLinus Torvalds struct tcp_iter_state *st; 27325e659e4cSPavel Emelyanov int len; 27331da177e4SLinus Torvalds 27341da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 27351da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, 27361da177e4SLinus Torvalds " sl local_address rem_address st tx_queue " 27371da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 27381da177e4SLinus Torvalds "inode"); 27391da177e4SLinus Torvalds goto out; 27401da177e4SLinus Torvalds } 27411da177e4SLinus Torvalds st = seq->private; 27421da177e4SLinus Torvalds 27431da177e4SLinus Torvalds switch (st->state) { 27441da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 27451da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 27465e659e4cSPavel Emelyanov get_tcp4_sock(v, seq, st->num, &len); 27471da177e4SLinus Torvalds break; 27481da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 27495e659e4cSPavel Emelyanov get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 27501da177e4SLinus Torvalds break; 27511da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 27525e659e4cSPavel Emelyanov get_timewait4_sock(v, seq, st->num, &len); 27531da177e4SLinus Torvalds break; 27541da177e4SLinus Torvalds } 27555e659e4cSPavel Emelyanov seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 27561da177e4SLinus Torvalds out: 27571da177e4SLinus Torvalds return 0; 27581da177e4SLinus Torvalds } 27591da177e4SLinus Torvalds 276073cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 276173cb88ecSArjan van de Ven .owner = THIS_MODULE, 276273cb88ecSArjan van de Ven .open = tcp_seq_open, 276373cb88ecSArjan van de Ven .read = seq_read, 276473cb88ecSArjan van de Ven .llseek = seq_lseek, 276573cb88ecSArjan van de Ven .release = seq_release_net 276673cb88ecSArjan van de Ven }; 276773cb88ecSArjan van de Ven 27681da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 27691da177e4SLinus Torvalds .name = "tcp", 27701da177e4SLinus Torvalds .family = AF_INET, 277173cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 27729427c4b3SDenis V. Lunev .seq_ops = { 27739427c4b3SDenis V. Lunev .show = tcp4_seq_show, 27749427c4b3SDenis V. Lunev }, 27751da177e4SLinus Torvalds }; 27761da177e4SLinus Torvalds 27772c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2778757764f6SPavel Emelyanov { 2779757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2780757764f6SPavel Emelyanov } 2781757764f6SPavel Emelyanov 27822c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2783757764f6SPavel Emelyanov { 2784757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2785757764f6SPavel Emelyanov } 2786757764f6SPavel Emelyanov 2787757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2788757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2789757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2790757764f6SPavel Emelyanov }; 2791757764f6SPavel Emelyanov 27921da177e4SLinus Torvalds int __init tcp4_proc_init(void) 27931da177e4SLinus Torvalds { 2794757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 27951da177e4SLinus Torvalds } 27961da177e4SLinus Torvalds 27971da177e4SLinus Torvalds void tcp4_proc_exit(void) 27981da177e4SLinus Torvalds { 2799757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 28001da177e4SLinus Torvalds } 28011da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 28021da177e4SLinus Torvalds 2803bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2804bf296b12SHerbert Xu { 2805b71d1d42SEric Dumazet const struct iphdr *iph = skb_gro_network_header(skb); 2806861b6501SEric Dumazet __wsum wsum; 2807861b6501SEric Dumazet __sum16 sum; 2808bf296b12SHerbert Xu 2809bf296b12SHerbert Xu switch (skb->ip_summed) { 2810bf296b12SHerbert Xu case CHECKSUM_COMPLETE: 281186911732SHerbert Xu if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 2812bf296b12SHerbert Xu skb->csum)) { 2813bf296b12SHerbert Xu skb->ip_summed = CHECKSUM_UNNECESSARY; 2814bf296b12SHerbert Xu break; 2815bf296b12SHerbert Xu } 2816861b6501SEric Dumazet flush: 2817bf296b12SHerbert Xu NAPI_GRO_CB(skb)->flush = 1; 2818bf296b12SHerbert Xu return NULL; 2819861b6501SEric Dumazet 2820861b6501SEric Dumazet case CHECKSUM_NONE: 2821861b6501SEric Dumazet wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 2822861b6501SEric Dumazet skb_gro_len(skb), IPPROTO_TCP, 0); 2823861b6501SEric Dumazet sum = csum_fold(skb_checksum(skb, 2824861b6501SEric Dumazet skb_gro_offset(skb), 2825861b6501SEric Dumazet skb_gro_len(skb), 2826861b6501SEric Dumazet wsum)); 2827861b6501SEric Dumazet if (sum) 2828861b6501SEric Dumazet goto flush; 2829861b6501SEric Dumazet 2830861b6501SEric Dumazet skb->ip_summed = CHECKSUM_UNNECESSARY; 2831861b6501SEric Dumazet break; 2832bf296b12SHerbert Xu } 2833bf296b12SHerbert Xu 2834bf296b12SHerbert Xu return tcp_gro_receive(head, skb); 2835bf296b12SHerbert Xu } 2836bf296b12SHerbert Xu 2837bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb) 2838bf296b12SHerbert Xu { 2839b71d1d42SEric Dumazet const struct iphdr *iph = ip_hdr(skb); 2840bf296b12SHerbert Xu struct tcphdr *th = tcp_hdr(skb); 2841bf296b12SHerbert Xu 2842bf296b12SHerbert Xu th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2843bf296b12SHerbert Xu iph->saddr, iph->daddr, 0); 2844bf296b12SHerbert Xu skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 2845bf296b12SHerbert Xu 2846bf296b12SHerbert Xu return tcp_gro_complete(skb); 2847bf296b12SHerbert Xu } 2848bf296b12SHerbert Xu 28491da177e4SLinus Torvalds struct proto tcp_prot = { 28501da177e4SLinus Torvalds .name = "TCP", 28511da177e4SLinus Torvalds .owner = THIS_MODULE, 28521da177e4SLinus Torvalds .close = tcp_close, 28531da177e4SLinus Torvalds .connect = tcp_v4_connect, 28541da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2855463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 28561da177e4SLinus Torvalds .ioctl = tcp_ioctl, 28571da177e4SLinus Torvalds .init = tcp_v4_init_sock, 28581da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 28591da177e4SLinus Torvalds .shutdown = tcp_shutdown, 28601da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 28611da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 28621da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 28637ba42910SChangli Gao .sendmsg = tcp_sendmsg, 28647ba42910SChangli Gao .sendpage = tcp_sendpage, 28651da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 286646d3ceabSEric Dumazet .release_cb = tcp_release_cb, 2867563d34d0SEric Dumazet .mtu_reduced = tcp_v4_mtu_reduced, 2868ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2869ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2870ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 28711da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 28721da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 28730a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 28741da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 28751da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 28761da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 28771da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 28781da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 28791da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 28803ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 28816d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 288260236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 288339d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 28847ba42910SChangli Gao .no_autobind = true, 2885543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2886543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2887543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2888543d9cfeSArnaldo Carvalho de Melo #endif 2889c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM 2890d1a4c0b3SGlauber Costa .init_cgroup = tcp_init_cgroup, 2891d1a4c0b3SGlauber Costa .destroy_cgroup = tcp_destroy_cgroup, 2892d1a4c0b3SGlauber Costa .proto_cgroup = tcp_proto_cgroup, 2893d1a4c0b3SGlauber Costa #endif 28941da177e4SLinus Torvalds }; 28954bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 28961da177e4SLinus Torvalds 2897046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net) 2898046ee902SDenis V. Lunev { 2899be9f4a44SEric Dumazet return 0; 2900046ee902SDenis V. Lunev } 2901046ee902SDenis V. Lunev 2902046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2903046ee902SDenis V. Lunev { 2904b099ce26SEric W. Biederman } 2905b099ce26SEric W. Biederman 2906b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2907b099ce26SEric W. Biederman { 2908b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2909046ee902SDenis V. Lunev } 2910046ee902SDenis V. Lunev 2911046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2912046ee902SDenis V. Lunev .init = tcp_sk_init, 2913046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2914b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2915046ee902SDenis V. Lunev }; 2916046ee902SDenis V. Lunev 29179b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 29181da177e4SLinus Torvalds { 29195caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 29206a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 29211da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 29221da177e4SLinus Torvalds } 2923