11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds 54eb4dea58SHerbert Xu #include <linux/bottom_half.h> 551da177e4SLinus Torvalds #include <linux/types.h> 561da177e4SLinus Torvalds #include <linux/fcntl.h> 571da177e4SLinus Torvalds #include <linux/module.h> 581da177e4SLinus Torvalds #include <linux/random.h> 591da177e4SLinus Torvalds #include <linux/cache.h> 601da177e4SLinus Torvalds #include <linux/jhash.h> 611da177e4SLinus Torvalds #include <linux/init.h> 621da177e4SLinus Torvalds #include <linux/times.h> 635a0e3ad6STejun Heo #include <linux/slab.h> 641da177e4SLinus Torvalds 65457c4cbcSEric W. Biederman #include <net/net_namespace.h> 661da177e4SLinus Torvalds #include <net/icmp.h> 67304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 681da177e4SLinus Torvalds #include <net/tcp.h> 6920380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 701da177e4SLinus Torvalds #include <net/ipv6.h> 711da177e4SLinus Torvalds #include <net/inet_common.h> 726d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 731da177e4SLinus Torvalds #include <net/xfrm.h> 741a2449a8SChris Leech #include <net/netdma.h> 756e5714eaSDavid S. Miller #include <net/secure_seq.h> 76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds #include <linux/inet.h> 791da177e4SLinus Torvalds #include <linux/ipv6.h> 801da177e4SLinus Torvalds #include <linux/stddef.h> 811da177e4SLinus Torvalds #include <linux/proc_fs.h> 821da177e4SLinus Torvalds #include <linux/seq_file.h> 831da177e4SLinus Torvalds 84cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 86cfb6eeb4SYOSHIFUJI Hideaki 87ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 88ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 894bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds 92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 94318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 95cfb6eeb4SYOSHIFUJI Hideaki #endif 96cfb6eeb4SYOSHIFUJI Hideaki 975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 991da177e4SLinus Torvalds 100cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1011da177e4SLinus Torvalds { 102eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 103eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 104aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1061da177e4SLinus Torvalds } 1071da177e4SLinus Torvalds 1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1096d6ee43eSArnaldo Carvalho de Melo { 1106d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1116d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1126d6ee43eSArnaldo Carvalho de Melo 1136d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1146d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1156d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1166d6ee43eSArnaldo Carvalho de Melo 1176d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1186d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1196d6ee43eSArnaldo Carvalho de Melo holder. 1206d6ee43eSArnaldo Carvalho de Melo 1216d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1226d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1236d6ee43eSArnaldo Carvalho de Melo */ 1246d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1256d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1269d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1276d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1286d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1296d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1306d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1326d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1336d6ee43eSArnaldo Carvalho de Melo return 1; 1346d6ee43eSArnaldo Carvalho de Melo } 1356d6ee43eSArnaldo Carvalho de Melo 1366d6ee43eSArnaldo Carvalho de Melo return 0; 1376d6ee43eSArnaldo Carvalho de Melo } 1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1396d6ee43eSArnaldo Carvalho de Melo 1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1421da177e4SLinus Torvalds { 1432d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1441da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1451da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 146dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 147bada8adcSAl Viro __be32 daddr, nexthop; 148da905bd1SDavid S. Miller struct flowi4 *fl4; 1492d7192d6SDavid S. Miller struct rtable *rt; 1501da177e4SLinus Torvalds int err; 151f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1541da177e4SLinus Torvalds return -EINVAL; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1571da177e4SLinus Torvalds return -EAFNOSUPPORT; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 160f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 161f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 162f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1631da177e4SLinus Torvalds if (!daddr) 1641da177e4SLinus Torvalds return -EINVAL; 165f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 168dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 169dca8b089SDavid S. Miller orig_dport = usin->sin_port; 170da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 171da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1721da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1731da177e4SLinus Torvalds IPPROTO_TCP, 174abdf7e72SDavid S. Miller orig_sport, orig_dport, sk, true); 175b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 176b23dd4feSDavid S. Miller err = PTR_ERR(rt); 177b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 1787c73a6faSPavel Emelyanov IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 179b23dd4feSDavid S. Miller return err; 180584bdf8cSWei Dong } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1831da177e4SLinus Torvalds ip_rt_put(rt); 1841da177e4SLinus Torvalds return -ENETUNREACH; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 187f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 188da905bd1SDavid S. Miller daddr = fl4->daddr; 1891da177e4SLinus Torvalds 190c720c7e8SEric Dumazet if (!inet->inet_saddr) 191da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 192c720c7e8SEric Dumazet inet->inet_rcv_saddr = inet->inet_saddr; 1931da177e4SLinus Torvalds 194c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 1951da177e4SLinus Torvalds /* Reset inherited state */ 1961da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 1971da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 1981da177e4SLinus Torvalds tp->write_seq = 0; 1991da177e4SLinus Torvalds } 2001da177e4SLinus Torvalds 201295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 202da905bd1SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { 203ed2361e6SDavid S. Miller struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); 2047174259eSArnaldo Carvalho de Melo /* 2057174259eSArnaldo Carvalho de Melo * VJ's idea. We save last timestamp seen from 2067174259eSArnaldo Carvalho de Melo * the destination in peer table, when entering state 2077174259eSArnaldo Carvalho de Melo * TIME-WAIT * and initialize rx_opt.ts_recent from it, 2087174259eSArnaldo Carvalho de Melo * when trying new connection. 2091da177e4SLinus Torvalds */ 210317fe0e6SEric Dumazet if (peer) { 211317fe0e6SEric Dumazet inet_peer_refcheck(peer); 212317fe0e6SEric Dumazet if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 2131da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 2141da177e4SLinus Torvalds tp->rx_opt.ts_recent = peer->tcp_ts; 2151da177e4SLinus Torvalds } 2161da177e4SLinus Torvalds } 217317fe0e6SEric Dumazet } 2181da177e4SLinus Torvalds 219c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 220c720c7e8SEric Dumazet inet->inet_daddr = daddr; 2211da177e4SLinus Torvalds 222d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 223f6d8bd05SEric Dumazet if (inet_opt) 224f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2251da177e4SLinus Torvalds 226bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2291da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2301da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2311da177e4SLinus Torvalds * complete initialization after this. 2321da177e4SLinus Torvalds */ 2331da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 234a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2351da177e4SLinus Torvalds if (err) 2361da177e4SLinus Torvalds goto failure; 2371da177e4SLinus Torvalds 238da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 239c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 240b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 241b23dd4feSDavid S. Miller err = PTR_ERR(rt); 242b23dd4feSDavid S. Miller rt = NULL; 2431da177e4SLinus Torvalds goto failure; 244b23dd4feSDavid S. Miller } 2451da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 246bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 247d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds if (!tp->write_seq) 250c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 251c720c7e8SEric Dumazet inet->inet_daddr, 252c720c7e8SEric Dumazet inet->inet_sport, 2531da177e4SLinus Torvalds usin->sin_port); 2541da177e4SLinus Torvalds 255c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds err = tcp_connect(sk); 2581da177e4SLinus Torvalds rt = NULL; 2591da177e4SLinus Torvalds if (err) 2601da177e4SLinus Torvalds goto failure; 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds return 0; 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds failure: 2657174259eSArnaldo Carvalho de Melo /* 2667174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2677174259eSArnaldo Carvalho de Melo * if necessary. 2687174259eSArnaldo Carvalho de Melo */ 2691da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2701da177e4SLinus Torvalds ip_rt_put(rt); 2711da177e4SLinus Torvalds sk->sk_route_caps = 0; 272c720c7e8SEric Dumazet inet->inet_dport = 0; 2731da177e4SLinus Torvalds return err; 2741da177e4SLinus Torvalds } 2754bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* 2781da177e4SLinus Torvalds * This routine does path mtu discovery as defined in RFC1191. 2791da177e4SLinus Torvalds */ 280b71d1d42SEric Dumazet static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) 2811da177e4SLinus Torvalds { 2821da177e4SLinus Torvalds struct dst_entry *dst; 2831da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 2841da177e4SLinus Torvalds 2851da177e4SLinus Torvalds /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 2861da177e4SLinus Torvalds * send out by Linux are always <576bytes so they should go through 2871da177e4SLinus Torvalds * unfragmented). 2881da177e4SLinus Torvalds */ 2891da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) 2901da177e4SLinus Torvalds return; 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds /* We don't check in the destentry if pmtu discovery is forbidden 2931da177e4SLinus Torvalds * on this route. We just assume that no packet_to_big packets 2941da177e4SLinus Torvalds * are send back when pmtu discovery is not active. 2951da177e4SLinus Torvalds * There is a small race when the user changes this flag in the 2961da177e4SLinus Torvalds * route, but I think that's acceptable. 2971da177e4SLinus Torvalds */ 2981da177e4SLinus Torvalds if ((dst = __sk_dst_check(sk, 0)) == NULL) 2991da177e4SLinus Torvalds return; 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds dst->ops->update_pmtu(dst, mtu); 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 3041da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 3051da177e4SLinus Torvalds */ 3061da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 3071da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds mtu = dst_mtu(dst); 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 312d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3131da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds /* Resend the TCP packet because it's 3161da177e4SLinus Torvalds * clear that the old packet has been 3171da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3181da177e4SLinus Torvalds * discovery. 3191da177e4SLinus Torvalds */ 3201da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3211da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3221da177e4SLinus Torvalds } 3231da177e4SLinus Torvalds 3241da177e4SLinus Torvalds /* 3251da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3261da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3271da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3281da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3291da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3301da177e4SLinus Torvalds * to find the appropriate port. 3311da177e4SLinus Torvalds * 3321da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3331da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3341da177e4SLinus Torvalds * and for some paths there is no check at all. 3351da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3361da177e4SLinus Torvalds * is probably better. 3371da177e4SLinus Torvalds * 3381da177e4SLinus Torvalds */ 3391da177e4SLinus Torvalds 3404d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3411da177e4SLinus Torvalds { 342b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3434d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 344f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3451da177e4SLinus Torvalds struct tcp_sock *tp; 3461da177e4SLinus Torvalds struct inet_sock *inet; 3474d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3484d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3491da177e4SLinus Torvalds struct sock *sk; 350f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3511da177e4SLinus Torvalds __u32 seq; 352f1ecd5d9SDamian Lukowski __u32 remaining; 3531da177e4SLinus Torvalds int err; 3544d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3551da177e4SLinus Torvalds 3564d1a2d9eSDamian Lukowski if (icmp_skb->len < (iph->ihl << 2) + 8) { 357dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3581da177e4SLinus Torvalds return; 3591da177e4SLinus Torvalds } 3601da177e4SLinus Torvalds 361fd54d716SPavel Emelyanov sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 3624d1a2d9eSDamian Lukowski iph->saddr, th->source, inet_iif(icmp_skb)); 3631da177e4SLinus Torvalds if (!sk) { 364dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3651da177e4SLinus Torvalds return; 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3689469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3691da177e4SLinus Torvalds return; 3701da177e4SLinus Torvalds } 3711da177e4SLinus Torvalds 3721da177e4SLinus Torvalds bh_lock_sock(sk); 3731da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3741da177e4SLinus Torvalds * servers this needs to be solved differently. 3751da177e4SLinus Torvalds */ 3761da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 377de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 3781da177e4SLinus Torvalds 3791da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3801da177e4SLinus Torvalds goto out; 3811da177e4SLinus Torvalds 38297e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 38397e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 38497e3ecd1Sstephen hemminger goto out; 38597e3ecd1Sstephen hemminger } 38697e3ecd1Sstephen hemminger 387f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 3881da177e4SLinus Torvalds tp = tcp_sk(sk); 3891da177e4SLinus Torvalds seq = ntohl(th->seq); 3901da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 3911da177e4SLinus Torvalds !between(seq, tp->snd_una, tp->snd_nxt)) { 392de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 3931da177e4SLinus Torvalds goto out; 3941da177e4SLinus Torvalds } 3951da177e4SLinus Torvalds 3961da177e4SLinus Torvalds switch (type) { 3971da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 3981da177e4SLinus Torvalds /* Just silently ignore these. */ 3991da177e4SLinus Torvalds goto out; 4001da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4011da177e4SLinus Torvalds err = EPROTO; 4021da177e4SLinus Torvalds break; 4031da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4041da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4051da177e4SLinus Torvalds goto out; 4061da177e4SLinus Torvalds 4071da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4081da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) 4091da177e4SLinus Torvalds do_pmtu_discovery(sk, iph, info); 4101da177e4SLinus Torvalds goto out; 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds 4131da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 414f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 415f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 416f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 417f1ecd5d9SDamian Lukowski break; 418f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 419f1ecd5d9SDamian Lukowski !icsk->icsk_backoff) 420f1ecd5d9SDamian Lukowski break; 421f1ecd5d9SDamian Lukowski 4228f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4238f49c270SDavid S. Miller break; 4248f49c270SDavid S. Miller 425f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 4269ad7c049SJerry Chu inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : 4279ad7c049SJerry Chu TCP_TIMEOUT_INIT) << icsk->icsk_backoff; 428f1ecd5d9SDamian Lukowski tcp_bound_rto(sk); 429f1ecd5d9SDamian Lukowski 430f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 431f1ecd5d9SDamian Lukowski BUG_ON(!skb); 432f1ecd5d9SDamian Lukowski 433f1ecd5d9SDamian Lukowski remaining = icsk->icsk_rto - min(icsk->icsk_rto, 434f1ecd5d9SDamian Lukowski tcp_time_stamp - TCP_SKB_CB(skb)->when); 435f1ecd5d9SDamian Lukowski 436f1ecd5d9SDamian Lukowski if (remaining) { 437f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 438f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 439f1ecd5d9SDamian Lukowski } else { 440f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 441f1ecd5d9SDamian Lukowski * Will retransmit now */ 442f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 443f1ecd5d9SDamian Lukowski } 444f1ecd5d9SDamian Lukowski 4451da177e4SLinus Torvalds break; 4461da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4471da177e4SLinus Torvalds err = EHOSTUNREACH; 4481da177e4SLinus Torvalds break; 4491da177e4SLinus Torvalds default: 4501da177e4SLinus Torvalds goto out; 4511da177e4SLinus Torvalds } 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds switch (sk->sk_state) { 45460236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4551da177e4SLinus Torvalds case TCP_LISTEN: 4561da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4571da177e4SLinus Torvalds goto out; 4581da177e4SLinus Torvalds 459463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4601da177e4SLinus Torvalds iph->daddr, iph->saddr); 4611da177e4SLinus Torvalds if (!req) 4621da177e4SLinus Torvalds goto out; 4631da177e4SLinus Torvalds 4641da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4651da177e4SLinus Torvalds an established socket here. 4661da177e4SLinus Torvalds */ 467547b792cSIlpo Järvinen WARN_ON(req->sk); 4681da177e4SLinus Torvalds 4692e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 470de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4711da177e4SLinus Torvalds goto out; 4721da177e4SLinus Torvalds } 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds /* 4751da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 4761da177e4SLinus Torvalds * There is no good way to pass the error to the newly 4771da177e4SLinus Torvalds * created socket, and POSIX does not want network 4781da177e4SLinus Torvalds * errors returned from accept(). 4791da177e4SLinus Torvalds */ 480463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 4811da177e4SLinus Torvalds goto out; 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds case TCP_SYN_SENT: 4841da177e4SLinus Torvalds case TCP_SYN_RECV: /* Cannot happen. 4851da177e4SLinus Torvalds It can f.e. if SYNs crossed. 4861da177e4SLinus Torvalds */ 4871da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 4881da177e4SLinus Torvalds sk->sk_err = err; 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds sk->sk_error_report(sk); 4911da177e4SLinus Torvalds 4921da177e4SLinus Torvalds tcp_done(sk); 4931da177e4SLinus Torvalds } else { 4941da177e4SLinus Torvalds sk->sk_err_soft = err; 4951da177e4SLinus Torvalds } 4961da177e4SLinus Torvalds goto out; 4971da177e4SLinus Torvalds } 4981da177e4SLinus Torvalds 4991da177e4SLinus Torvalds /* If we've already connected we will keep trying 5001da177e4SLinus Torvalds * until we time out, or the user gives up. 5011da177e4SLinus Torvalds * 5021da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5031da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5041da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5051da177e4SLinus Torvalds * 5061da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5071da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5081da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5091da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5101da177e4SLinus Torvalds * 5111da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5121da177e4SLinus Torvalds * --ANK (980905) 5131da177e4SLinus Torvalds */ 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds inet = inet_sk(sk); 5161da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5171da177e4SLinus Torvalds sk->sk_err = err; 5181da177e4SLinus Torvalds sk->sk_error_report(sk); 5191da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5201da177e4SLinus Torvalds sk->sk_err_soft = err; 5211da177e4SLinus Torvalds } 5221da177e4SLinus Torvalds 5231da177e4SLinus Torvalds out: 5241da177e4SLinus Torvalds bh_unlock_sock(sk); 5251da177e4SLinus Torvalds sock_put(sk); 5261da177e4SLinus Torvalds } 5271da177e4SLinus Torvalds 528419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb, 529419f9f89SHerbert Xu __be32 saddr, __be32 daddr) 5301da177e4SLinus Torvalds { 531aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5321da177e4SLinus Torvalds 53384fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 534419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 535663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 536ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5371da177e4SLinus Torvalds } else { 538419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 53907f0757aSJoe Perches csum_partial(th, 5401da177e4SLinus Torvalds th->doff << 2, 5411da177e4SLinus Torvalds skb->csum)); 5421da177e4SLinus Torvalds } 5431da177e4SLinus Torvalds } 5441da177e4SLinus Torvalds 545419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 546bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 547419f9f89SHerbert Xu { 548cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 549419f9f89SHerbert Xu 550419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 551419f9f89SHerbert Xu } 5524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 553419f9f89SHerbert Xu 554a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb) 555a430a43dSHerbert Xu { 556eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 557a430a43dSHerbert Xu struct tcphdr *th; 558a430a43dSHerbert Xu 559a430a43dSHerbert Xu if (!pskb_may_pull(skb, sizeof(*th))) 560a430a43dSHerbert Xu return -EINVAL; 561a430a43dSHerbert Xu 562eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 563aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 564a430a43dSHerbert Xu 565a430a43dSHerbert Xu th->check = 0; 56684fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_PARTIAL; 567419f9f89SHerbert Xu __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 568a430a43dSHerbert Xu return 0; 569a430a43dSHerbert Xu } 570a430a43dSHerbert Xu 5711da177e4SLinus Torvalds /* 5721da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5731da177e4SLinus Torvalds * 5741da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5751da177e4SLinus Torvalds * for reset. 5761da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5771da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5781da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5791da177e4SLinus Torvalds * So that we build reply only basing on parameters 5801da177e4SLinus Torvalds * arrived with segment. 5811da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5821da177e4SLinus Torvalds */ 5831da177e4SLinus Torvalds 584cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 5851da177e4SLinus Torvalds { 586cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 587cfb6eeb4SYOSHIFUJI Hideaki struct { 588cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 590714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 591cfb6eeb4SYOSHIFUJI Hideaki #endif 592cfb6eeb4SYOSHIFUJI Hideaki } rep; 5931da177e4SLinus Torvalds struct ip_reply_arg arg; 594cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 595cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 596658ddaafSShawn Lu const __u8 *hash_location = NULL; 597658ddaafSShawn Lu unsigned char newhash[16]; 598658ddaafSShawn Lu int genhash; 599658ddaafSShawn Lu struct sock *sk1 = NULL; 600cfb6eeb4SYOSHIFUJI Hideaki #endif 601a86b1e30SPavel Emelyanov struct net *net; 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 6041da177e4SLinus Torvalds if (th->rst) 6051da177e4SLinus Torvalds return; 6061da177e4SLinus Torvalds 607511c3f92SEric Dumazet if (skb_rtable(skb)->rt_type != RTN_LOCAL) 6081da177e4SLinus Torvalds return; 6091da177e4SLinus Torvalds 6101da177e4SLinus Torvalds /* Swap the send and the receive. */ 611cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 612cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 613cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 614cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 615cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6161da177e4SLinus Torvalds 6171da177e4SLinus Torvalds if (th->ack) { 618cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6191da177e4SLinus Torvalds } else { 620cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 621cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6221da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6231da177e4SLinus Torvalds } 6241da177e4SLinus Torvalds 6257174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 626cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 627cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 628cfb6eeb4SYOSHIFUJI Hideaki 629cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 630658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 631658ddaafSShawn Lu if (!sk && hash_location) { 632658ddaafSShawn Lu /* 633658ddaafSShawn Lu * active side is lost. Try to find listening socket through 634658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 635658ddaafSShawn Lu * we are not loose security here: 636658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 637658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 638658ddaafSShawn Lu */ 639658ddaafSShawn Lu sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), 640658ddaafSShawn Lu &tcp_hashinfo, ip_hdr(skb)->daddr, 641658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 642658ddaafSShawn Lu /* don't send rst if it can't find key */ 643658ddaafSShawn Lu if (!sk1) 644658ddaafSShawn Lu return; 645658ddaafSShawn Lu rcu_read_lock(); 646658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 647658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 648658ddaafSShawn Lu if (!key) 649658ddaafSShawn Lu goto release_sk1; 650658ddaafSShawn Lu 651658ddaafSShawn Lu genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); 652658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 653658ddaafSShawn Lu goto release_sk1; 654658ddaafSShawn Lu } else { 655658ddaafSShawn Lu key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 656658ddaafSShawn Lu &ip_hdr(skb)->saddr, 657a915da9bSEric Dumazet AF_INET) : NULL; 658658ddaafSShawn Lu } 659658ddaafSShawn Lu 660cfb6eeb4SYOSHIFUJI Hideaki if (key) { 661cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 662cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 663cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 664cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 665cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 666cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 667cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 668cfb6eeb4SYOSHIFUJI Hideaki 66949a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 67078e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 67178e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 672cfb6eeb4SYOSHIFUJI Hideaki } 673cfb6eeb4SYOSHIFUJI Hideaki #endif 674eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 675eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 67652cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6771da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 67888ef4a5aSKOVACS Krisztian arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 679e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 680e2446eaaSShawn Lu * routing might fail in this case. using iif for oif to 681e2446eaaSShawn Lu * make sure we can deliver it 682e2446eaaSShawn Lu */ 683e2446eaaSShawn Lu arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); 6841da177e4SLinus Torvalds 685adf30907SEric Dumazet net = dev_net(skb_dst(skb)->dev); 68666b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 6870a5ebb80SDavid S. Miller ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 6887feb49c8SDenis V. Lunev &arg, arg.iov[0].iov_len); 6891da177e4SLinus Torvalds 69063231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 69163231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 692658ddaafSShawn Lu 693658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 694658ddaafSShawn Lu release_sk1: 695658ddaafSShawn Lu if (sk1) { 696658ddaafSShawn Lu rcu_read_unlock(); 697658ddaafSShawn Lu sock_put(sk1); 698658ddaafSShawn Lu } 699658ddaafSShawn Lu #endif 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 7021da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7031da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7041da177e4SLinus Torvalds */ 7051da177e4SLinus Torvalds 7069501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 7079501f972SYOSHIFUJI Hideaki u32 win, u32 ts, int oif, 70888ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 70966b13d99SEric Dumazet int reply_flags, u8 tos) 7101da177e4SLinus Torvalds { 711cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7121da177e4SLinus Torvalds struct { 7131da177e4SLinus Torvalds struct tcphdr th; 714714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 715cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 716cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 717cfb6eeb4SYOSHIFUJI Hideaki #endif 718cfb6eeb4SYOSHIFUJI Hideaki ]; 7191da177e4SLinus Torvalds } rep; 7201da177e4SLinus Torvalds struct ip_reply_arg arg; 721adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 7221da177e4SLinus Torvalds 7231da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7247174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7251da177e4SLinus Torvalds 7261da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7271da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 7281da177e4SLinus Torvalds if (ts) { 729cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7301da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7311da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 732cfb6eeb4SYOSHIFUJI Hideaki rep.opt[1] = htonl(tcp_time_stamp); 733cfb6eeb4SYOSHIFUJI Hideaki rep.opt[2] = htonl(ts); 734cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7351da177e4SLinus Torvalds } 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds /* Swap the send and the receive. */ 7381da177e4SLinus Torvalds rep.th.dest = th->source; 7391da177e4SLinus Torvalds rep.th.source = th->dest; 7401da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7411da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7421da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7431da177e4SLinus Torvalds rep.th.ack = 1; 7441da177e4SLinus Torvalds rep.th.window = htons(win); 7451da177e4SLinus Torvalds 746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 747cfb6eeb4SYOSHIFUJI Hideaki if (key) { 748cfb6eeb4SYOSHIFUJI Hideaki int offset = (ts) ? 3 : 0; 749cfb6eeb4SYOSHIFUJI Hideaki 750cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 751cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 752cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 753cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 754cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 755cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 756cfb6eeb4SYOSHIFUJI Hideaki 75749a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 75890b7e112SAdam Langley key, ip_hdr(skb)->saddr, 75990b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 760cfb6eeb4SYOSHIFUJI Hideaki } 761cfb6eeb4SYOSHIFUJI Hideaki #endif 76288ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 763eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 764eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7651da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7661da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7679501f972SYOSHIFUJI Hideaki if (oif) 7689501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 76966b13d99SEric Dumazet arg.tos = tos; 7700a5ebb80SDavid S. Miller ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 7717feb49c8SDenis V. Lunev &arg, arg.iov[0].iov_len); 7721da177e4SLinus Torvalds 77363231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 7741da177e4SLinus Torvalds } 7751da177e4SLinus Torvalds 7761da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7771da177e4SLinus Torvalds { 7788feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 779cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7801da177e4SLinus Torvalds 7819501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7827174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 7839501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 7849501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 78588ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 78666b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 78766b13d99SEric Dumazet tw->tw_tos 7889501f972SYOSHIFUJI Hideaki ); 7891da177e4SLinus Torvalds 7908feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 7911da177e4SLinus Torvalds } 7921da177e4SLinus Torvalds 7936edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 7947174259eSArnaldo Carvalho de Melo struct request_sock *req) 7951da177e4SLinus Torvalds { 7969501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, 797cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 7989501f972SYOSHIFUJI Hideaki req->ts_recent, 7999501f972SYOSHIFUJI Hideaki 0, 800a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 801a915da9bSEric Dumazet AF_INET), 80266b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 80366b13d99SEric Dumazet ip_hdr(skb)->tos); 8041da177e4SLinus Torvalds } 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds /* 8079bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 80860236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8091da177e4SLinus Torvalds * socket. 8101da177e4SLinus Torvalds */ 81172659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 812e6b4d113SWilliam Allen Simpson struct request_sock *req, 813e6b4d113SWilliam Allen Simpson struct request_values *rvp) 8141da177e4SLinus Torvalds { 8152e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8166bd023f3SDavid S. Miller struct flowi4 fl4; 8171da177e4SLinus Torvalds int err = -1; 8181da177e4SLinus Torvalds struct sk_buff * skb; 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds /* First, grab a route. */ 8216bd023f3SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 822fd80eb94SDenis V. Lunev return -1; 8231da177e4SLinus Torvalds 824e6b4d113SWilliam Allen Simpson skb = tcp_make_synack(sk, dst, req, rvp); 8251da177e4SLinus Torvalds 8261da177e4SLinus Torvalds if (skb) { 827419f9f89SHerbert Xu __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 8281da177e4SLinus Torvalds 8292e6599cbSArnaldo Carvalho de Melo err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 8302e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 8312e6599cbSArnaldo Carvalho de Melo ireq->opt); 832b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8331da177e4SLinus Torvalds } 8341da177e4SLinus Torvalds 8351da177e4SLinus Torvalds dst_release(dst); 8361da177e4SLinus Torvalds return err; 8371da177e4SLinus Torvalds } 8381da177e4SLinus Torvalds 83972659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 840e6b4d113SWilliam Allen Simpson struct request_values *rvp) 841fd80eb94SDenis V. Lunev { 84272659eccSOctavian Purdila TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 84372659eccSOctavian Purdila return tcp_v4_send_synack(sk, NULL, req, rvp); 844fd80eb94SDenis V. Lunev } 845fd80eb94SDenis V. Lunev 8461da177e4SLinus Torvalds /* 84760236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8481da177e4SLinus Torvalds */ 84960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8501da177e4SLinus Torvalds { 8512e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8521da177e4SLinus Torvalds } 8531da177e4SLinus Torvalds 854946cedccSEric Dumazet /* 855946cedccSEric Dumazet * Return 1 if a syncookie should be sent 856946cedccSEric Dumazet */ 857946cedccSEric Dumazet int tcp_syn_flood_action(struct sock *sk, 858946cedccSEric Dumazet const struct sk_buff *skb, 859946cedccSEric Dumazet const char *proto) 8601da177e4SLinus Torvalds { 861946cedccSEric Dumazet const char *msg = "Dropping request"; 862946cedccSEric Dumazet int want_cookie = 0; 863946cedccSEric Dumazet struct listen_sock *lopt; 864946cedccSEric Dumazet 865946cedccSEric Dumazet 8661da177e4SLinus Torvalds 8672a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES 868946cedccSEric Dumazet if (sysctl_tcp_syncookies) { 8692a1d4bd4SFlorian Westphal msg = "Sending cookies"; 870946cedccSEric Dumazet want_cookie = 1; 871946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 872946cedccSEric Dumazet } else 87380e40daaSArnaldo Carvalho de Melo #endif 874946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 8752a1d4bd4SFlorian Westphal 876946cedccSEric Dumazet lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 877946cedccSEric Dumazet if (!lopt->synflood_warned) { 878946cedccSEric Dumazet lopt->synflood_warned = 1; 879946cedccSEric Dumazet pr_info("%s: Possible SYN flooding on port %d. %s. " 880946cedccSEric Dumazet " Check SNMP counters.\n", 881946cedccSEric Dumazet proto, ntohs(tcp_hdr(skb)->dest), msg); 8822a1d4bd4SFlorian Westphal } 883946cedccSEric Dumazet return want_cookie; 884946cedccSEric Dumazet } 885946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action); 8861da177e4SLinus Torvalds 8871da177e4SLinus Torvalds /* 88860236fddSArnaldo Carvalho de Melo * Save and compile IPv4 options into the request_sock if needed. 8891da177e4SLinus Torvalds */ 890f6d8bd05SEric Dumazet static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, 8911da177e4SLinus Torvalds struct sk_buff *skb) 8921da177e4SLinus Torvalds { 893f6d8bd05SEric Dumazet const struct ip_options *opt = &(IPCB(skb)->opt); 894f6d8bd05SEric Dumazet struct ip_options_rcu *dopt = NULL; 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds if (opt && opt->optlen) { 897f6d8bd05SEric Dumazet int opt_size = sizeof(*dopt) + opt->optlen; 898f6d8bd05SEric Dumazet 8991da177e4SLinus Torvalds dopt = kmalloc(opt_size, GFP_ATOMIC); 9001da177e4SLinus Torvalds if (dopt) { 901f6d8bd05SEric Dumazet if (ip_options_echo(&dopt->opt, skb)) { 9021da177e4SLinus Torvalds kfree(dopt); 9031da177e4SLinus Torvalds dopt = NULL; 9041da177e4SLinus Torvalds } 9051da177e4SLinus Torvalds } 9061da177e4SLinus Torvalds } 9071da177e4SLinus Torvalds return dopt; 9081da177e4SLinus Torvalds } 9091da177e4SLinus Torvalds 910cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 911cfb6eeb4SYOSHIFUJI Hideaki /* 912cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 913cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 914cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 915cfb6eeb4SYOSHIFUJI Hideaki */ 916cfb6eeb4SYOSHIFUJI Hideaki 917cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 918a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 919a915da9bSEric Dumazet const union tcp_md5_addr *addr, 920a915da9bSEric Dumazet int family) 921cfb6eeb4SYOSHIFUJI Hideaki { 922cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 923a915da9bSEric Dumazet struct tcp_md5sig_key *key; 924a915da9bSEric Dumazet struct hlist_node *pos; 925a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 926a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 927cfb6eeb4SYOSHIFUJI Hideaki 928a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 929a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 930*b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 931*b4fb05eaSEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 932a8afca03SEric Dumazet if (!md5sig) 933cfb6eeb4SYOSHIFUJI Hideaki return NULL; 934a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 935a915da9bSEric Dumazet if (family == AF_INET6) 936a915da9bSEric Dumazet size = sizeof(struct in6_addr); 937a915da9bSEric Dumazet #endif 938a8afca03SEric Dumazet hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { 939a915da9bSEric Dumazet if (key->family != family) 940a915da9bSEric Dumazet continue; 941a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 942a915da9bSEric Dumazet return key; 943cfb6eeb4SYOSHIFUJI Hideaki } 944cfb6eeb4SYOSHIFUJI Hideaki return NULL; 945cfb6eeb4SYOSHIFUJI Hideaki } 946a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 947cfb6eeb4SYOSHIFUJI Hideaki 948cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 949cfb6eeb4SYOSHIFUJI Hideaki struct sock *addr_sk) 950cfb6eeb4SYOSHIFUJI Hideaki { 951a915da9bSEric Dumazet union tcp_md5_addr *addr; 952a915da9bSEric Dumazet 953a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; 954a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 955cfb6eeb4SYOSHIFUJI Hideaki } 956cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 957cfb6eeb4SYOSHIFUJI Hideaki 958f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 959cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req) 960cfb6eeb4SYOSHIFUJI Hideaki { 961a915da9bSEric Dumazet union tcp_md5_addr *addr; 962a915da9bSEric Dumazet 963a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; 964a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 965cfb6eeb4SYOSHIFUJI Hideaki } 966cfb6eeb4SYOSHIFUJI Hideaki 967cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 968a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 969a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 970cfb6eeb4SYOSHIFUJI Hideaki { 971cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 972b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 973cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 974f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 975f6685938SArnaldo Carvalho de Melo 976a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 977a915da9bSEric Dumazet if (key) { 978a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 979a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 980a915da9bSEric Dumazet key->keylen = newkeylen; 981a915da9bSEric Dumazet return 0; 982cfb6eeb4SYOSHIFUJI Hideaki } 983260fcbebSYan, Zheng 984a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 985a8afca03SEric Dumazet sock_owned_by_user(sk)); 986a915da9bSEric Dumazet if (!md5sig) { 987a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 988a915da9bSEric Dumazet if (!md5sig) 989a915da9bSEric Dumazet return -ENOMEM; 990a915da9bSEric Dumazet 991a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 992a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 993a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 994a915da9bSEric Dumazet } 995a915da9bSEric Dumazet 9965f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 997a915da9bSEric Dumazet if (!key) 998a915da9bSEric Dumazet return -ENOMEM; 999a915da9bSEric Dumazet if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { 10005f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 1001cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 1002cfb6eeb4SYOSHIFUJI Hideaki } 1003f6685938SArnaldo Carvalho de Melo 1004a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 1005a915da9bSEric Dumazet key->keylen = newkeylen; 1006a915da9bSEric Dumazet key->family = family; 1007a915da9bSEric Dumazet memcpy(&key->addr, addr, 1008a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 1009a915da9bSEric Dumazet sizeof(struct in_addr)); 1010a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 1011cfb6eeb4SYOSHIFUJI Hideaki return 0; 1012cfb6eeb4SYOSHIFUJI Hideaki } 1013a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 1014cfb6eeb4SYOSHIFUJI Hideaki 1015a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 1016cfb6eeb4SYOSHIFUJI Hideaki { 1017cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1018a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1019a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1020cfb6eeb4SYOSHIFUJI Hideaki 1021a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 1022a915da9bSEric Dumazet if (!key) 1023cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 1024a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10255f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1026a915da9bSEric Dumazet kfree_rcu(key, rcu); 1027a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1028a8afca03SEric Dumazet sock_owned_by_user(sk)); 1029a8afca03SEric Dumazet if (hlist_empty(&md5sig->head)) 1030a915da9bSEric Dumazet tcp_free_md5sig_pool(); 1031a915da9bSEric Dumazet return 0; 1032cfb6eeb4SYOSHIFUJI Hideaki } 1033a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 1034cfb6eeb4SYOSHIFUJI Hideaki 1035a915da9bSEric Dumazet void tcp_clear_md5_list(struct sock *sk) 1036cfb6eeb4SYOSHIFUJI Hideaki { 1037cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1038a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1039a915da9bSEric Dumazet struct hlist_node *pos, *n; 1040a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1041cfb6eeb4SYOSHIFUJI Hideaki 1042a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1043a8afca03SEric Dumazet 1044a8afca03SEric Dumazet if (!hlist_empty(&md5sig->head)) 1045cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 1046a8afca03SEric Dumazet hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { 1047a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10485f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1049a915da9bSEric Dumazet kfree_rcu(key, rcu); 1050cfb6eeb4SYOSHIFUJI Hideaki } 1051cfb6eeb4SYOSHIFUJI Hideaki } 1052cfb6eeb4SYOSHIFUJI Hideaki 1053cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 1054cfb6eeb4SYOSHIFUJI Hideaki int optlen) 1055cfb6eeb4SYOSHIFUJI Hideaki { 1056cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 1057cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1058cfb6eeb4SYOSHIFUJI Hideaki 1059cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 1060cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1061cfb6eeb4SYOSHIFUJI Hideaki 1062cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1063cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1064cfb6eeb4SYOSHIFUJI Hideaki 1065cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1066cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1067cfb6eeb4SYOSHIFUJI Hideaki 1068a8afca03SEric Dumazet if (!cmd.tcpm_key || !cmd.tcpm_keylen) 1069a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1070a915da9bSEric Dumazet AF_INET); 1071cfb6eeb4SYOSHIFUJI Hideaki 1072cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1073cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1074cfb6eeb4SYOSHIFUJI Hideaki 1075a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1076a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1077a915da9bSEric Dumazet GFP_KERNEL); 1078cfb6eeb4SYOSHIFUJI Hideaki } 1079cfb6eeb4SYOSHIFUJI Hideaki 108049a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 108149a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1082cfb6eeb4SYOSHIFUJI Hideaki { 1083cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 108449a72dfbSAdam Langley struct scatterlist sg; 1085cfb6eeb4SYOSHIFUJI Hideaki 1086cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1087cfb6eeb4SYOSHIFUJI Hideaki 1088cfb6eeb4SYOSHIFUJI Hideaki /* 108949a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1090cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1091cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1092cfb6eeb4SYOSHIFUJI Hideaki */ 1093cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1094cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1095cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1096076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 109749a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1098c7da57a1SDavid S. Miller 109949a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 110049a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 110149a72dfbSAdam Langley } 110249a72dfbSAdam Langley 1103a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1104318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 110549a72dfbSAdam Langley { 110649a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 110749a72dfbSAdam Langley struct hash_desc *desc; 110849a72dfbSAdam Langley 110949a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 111049a72dfbSAdam Langley if (!hp) 111149a72dfbSAdam Langley goto clear_hash_noput; 111249a72dfbSAdam Langley desc = &hp->md5_desc; 111349a72dfbSAdam Langley 111449a72dfbSAdam Langley if (crypto_hash_init(desc)) 111549a72dfbSAdam Langley goto clear_hash; 111649a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 111749a72dfbSAdam Langley goto clear_hash; 111849a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 111949a72dfbSAdam Langley goto clear_hash; 112049a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 112149a72dfbSAdam Langley goto clear_hash; 112249a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1123cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1124cfb6eeb4SYOSHIFUJI Hideaki 1125cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1126cfb6eeb4SYOSHIFUJI Hideaki return 0; 112749a72dfbSAdam Langley 1128cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1129cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1130cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1131cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 113249a72dfbSAdam Langley return 1; 1133cfb6eeb4SYOSHIFUJI Hideaki } 1134cfb6eeb4SYOSHIFUJI Hideaki 113549a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1136318cf7aaSEric Dumazet const struct sock *sk, const struct request_sock *req, 1137318cf7aaSEric Dumazet const struct sk_buff *skb) 1138cfb6eeb4SYOSHIFUJI Hideaki { 113949a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 114049a72dfbSAdam Langley struct hash_desc *desc; 1141318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1142cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1143cfb6eeb4SYOSHIFUJI Hideaki 1144cfb6eeb4SYOSHIFUJI Hideaki if (sk) { 1145c720c7e8SEric Dumazet saddr = inet_sk(sk)->inet_saddr; 1146c720c7e8SEric Dumazet daddr = inet_sk(sk)->inet_daddr; 114749a72dfbSAdam Langley } else if (req) { 114849a72dfbSAdam Langley saddr = inet_rsk(req)->loc_addr; 114949a72dfbSAdam Langley daddr = inet_rsk(req)->rmt_addr; 1150cfb6eeb4SYOSHIFUJI Hideaki } else { 115149a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 115249a72dfbSAdam Langley saddr = iph->saddr; 115349a72dfbSAdam Langley daddr = iph->daddr; 1154cfb6eeb4SYOSHIFUJI Hideaki } 1155cfb6eeb4SYOSHIFUJI Hideaki 115649a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 115749a72dfbSAdam Langley if (!hp) 115849a72dfbSAdam Langley goto clear_hash_noput; 115949a72dfbSAdam Langley desc = &hp->md5_desc; 116049a72dfbSAdam Langley 116149a72dfbSAdam Langley if (crypto_hash_init(desc)) 116249a72dfbSAdam Langley goto clear_hash; 116349a72dfbSAdam Langley 116449a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 116549a72dfbSAdam Langley goto clear_hash; 116649a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 116749a72dfbSAdam Langley goto clear_hash; 116849a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 116949a72dfbSAdam Langley goto clear_hash; 117049a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 117149a72dfbSAdam Langley goto clear_hash; 117249a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 117349a72dfbSAdam Langley goto clear_hash; 117449a72dfbSAdam Langley 117549a72dfbSAdam Langley tcp_put_md5sig_pool(); 117649a72dfbSAdam Langley return 0; 117749a72dfbSAdam Langley 117849a72dfbSAdam Langley clear_hash: 117949a72dfbSAdam Langley tcp_put_md5sig_pool(); 118049a72dfbSAdam Langley clear_hash_noput: 118149a72dfbSAdam Langley memset(md5_hash, 0, 16); 118249a72dfbSAdam Langley return 1; 118349a72dfbSAdam Langley } 118449a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1185cfb6eeb4SYOSHIFUJI Hideaki 1186318cf7aaSEric Dumazet static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 1187cfb6eeb4SYOSHIFUJI Hideaki { 1188cfb6eeb4SYOSHIFUJI Hideaki /* 1189cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1190cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1191cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1192cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1193cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1194cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1195cfb6eeb4SYOSHIFUJI Hideaki */ 1196cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1197cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1198eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1199cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1200cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1201cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1202cfb6eeb4SYOSHIFUJI Hideaki 1203a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1204a915da9bSEric Dumazet AF_INET); 12057d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1206cfb6eeb4SYOSHIFUJI Hideaki 1207cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1208cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1209cfb6eeb4SYOSHIFUJI Hideaki return 0; 1210cfb6eeb4SYOSHIFUJI Hideaki 1211cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1212785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1213cfb6eeb4SYOSHIFUJI Hideaki return 1; 1214cfb6eeb4SYOSHIFUJI Hideaki } 1215cfb6eeb4SYOSHIFUJI Hideaki 1216cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1217785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1218cfb6eeb4SYOSHIFUJI Hideaki return 1; 1219cfb6eeb4SYOSHIFUJI Hideaki } 1220cfb6eeb4SYOSHIFUJI Hideaki 1221cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1222cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1223cfb6eeb4SYOSHIFUJI Hideaki */ 122449a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1225cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 122649a72dfbSAdam Langley NULL, NULL, skb); 1227cfb6eeb4SYOSHIFUJI Hideaki 1228cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1229cfb6eeb4SYOSHIFUJI Hideaki if (net_ratelimit()) { 1230673d57e7SHarvey Harrison printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1231673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1232673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1233cfb6eeb4SYOSHIFUJI Hideaki genhash ? " tcp_v4_calc_md5_hash failed" : ""); 1234cfb6eeb4SYOSHIFUJI Hideaki } 1235cfb6eeb4SYOSHIFUJI Hideaki return 1; 1236cfb6eeb4SYOSHIFUJI Hideaki } 1237cfb6eeb4SYOSHIFUJI Hideaki return 0; 1238cfb6eeb4SYOSHIFUJI Hideaki } 1239cfb6eeb4SYOSHIFUJI Hideaki 1240cfb6eeb4SYOSHIFUJI Hideaki #endif 1241cfb6eeb4SYOSHIFUJI Hideaki 124272a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12431da177e4SLinus Torvalds .family = PF_INET, 12442e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 124572659eccSOctavian Purdila .rtx_syn_ack = tcp_v4_rtx_synack, 124660236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 124760236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12481da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 124972659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12501da177e4SLinus Torvalds }; 12511da177e4SLinus Torvalds 1252cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1253b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1254cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_reqsk_md5_lookup, 1255e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1256cfb6eeb4SYOSHIFUJI Hideaki }; 1257b6332e6cSAndrew Morton #endif 1258cfb6eeb4SYOSHIFUJI Hideaki 12591da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12601da177e4SLinus Torvalds { 12614957faadSWilliam Allen Simpson struct tcp_extend_values tmp_ext; 12621da177e4SLinus Torvalds struct tcp_options_received tmp_opt; 1263cf533ea5SEric Dumazet const u8 *hash_location; 126460236fddSArnaldo Carvalho de Melo struct request_sock *req; 1265e6b4d113SWilliam Allen Simpson struct inet_request_sock *ireq; 12664957faadSWilliam Allen Simpson struct tcp_sock *tp = tcp_sk(sk); 1267e6b4d113SWilliam Allen Simpson struct dst_entry *dst = NULL; 1268eddc9ec5SArnaldo Carvalho de Melo __be32 saddr = ip_hdr(skb)->saddr; 1269eddc9ec5SArnaldo Carvalho de Melo __be32 daddr = ip_hdr(skb)->daddr; 12701da177e4SLinus Torvalds __u32 isn = TCP_SKB_CB(skb)->when; 12711da177e4SLinus Torvalds int want_cookie = 0; 12721da177e4SLinus Torvalds 12731da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1274511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 12751da177e4SLinus Torvalds goto drop; 12761da177e4SLinus Torvalds 12771da177e4SLinus Torvalds /* TW buckets are converted to open requests without 12781da177e4SLinus Torvalds * limitations, they conserve resources and peer is 12791da177e4SLinus Torvalds * evidently real one. 12801da177e4SLinus Torvalds */ 1281463c84b9SArnaldo Carvalho de Melo if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1282946cedccSEric Dumazet want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); 1283946cedccSEric Dumazet if (!want_cookie) 12841da177e4SLinus Torvalds goto drop; 12851da177e4SLinus Torvalds } 12861da177e4SLinus Torvalds 12871da177e4SLinus Torvalds /* Accept backlog is full. If we have already queued enough 12881da177e4SLinus Torvalds * of warm entries in syn queue, drop request. It is better than 12891da177e4SLinus Torvalds * clogging syn queue with openreqs with exponentially increasing 12901da177e4SLinus Torvalds * timeout. 12911da177e4SLinus Torvalds */ 1292463c84b9SArnaldo Carvalho de Melo if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 12931da177e4SLinus Torvalds goto drop; 12941da177e4SLinus Torvalds 1295ce4a7d0dSArnaldo Carvalho de Melo req = inet_reqsk_alloc(&tcp_request_sock_ops); 12961da177e4SLinus Torvalds if (!req) 12971da177e4SLinus Torvalds goto drop; 12981da177e4SLinus Torvalds 1299cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1300cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1301cfb6eeb4SYOSHIFUJI Hideaki #endif 1302cfb6eeb4SYOSHIFUJI Hideaki 13031da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 1304bee7ca9eSWilliam Allen Simpson tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 13054957faadSWilliam Allen Simpson tmp_opt.user_mss = tp->rx_opt.user_mss; 1306bb5b7c11SDavid S. Miller tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 13071da177e4SLinus Torvalds 13084957faadSWilliam Allen Simpson if (tmp_opt.cookie_plus > 0 && 13094957faadSWilliam Allen Simpson tmp_opt.saw_tstamp && 13104957faadSWilliam Allen Simpson !tp->rx_opt.cookie_out_never && 13114957faadSWilliam Allen Simpson (sysctl_tcp_cookie_size > 0 || 13124957faadSWilliam Allen Simpson (tp->cookie_values != NULL && 13134957faadSWilliam Allen Simpson tp->cookie_values->cookie_desired > 0))) { 13144957faadSWilliam Allen Simpson u8 *c; 13154957faadSWilliam Allen Simpson u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; 13164957faadSWilliam Allen Simpson int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; 13174957faadSWilliam Allen Simpson 13184957faadSWilliam Allen Simpson if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) 13194957faadSWilliam Allen Simpson goto drop_and_release; 13204957faadSWilliam Allen Simpson 13214957faadSWilliam Allen Simpson /* Secret recipe starts with IP addresses */ 13220eae88f3SEric Dumazet *mess++ ^= (__force u32)daddr; 13230eae88f3SEric Dumazet *mess++ ^= (__force u32)saddr; 13244957faadSWilliam Allen Simpson 13254957faadSWilliam Allen Simpson /* plus variable length Initiator Cookie */ 13264957faadSWilliam Allen Simpson c = (u8 *)mess; 13274957faadSWilliam Allen Simpson while (l-- > 0) 13284957faadSWilliam Allen Simpson *c++ ^= *hash_location++; 13294957faadSWilliam Allen Simpson 13304957faadSWilliam Allen Simpson want_cookie = 0; /* not our kind of cookie */ 13314957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 0; /* false */ 13324957faadSWilliam Allen Simpson tmp_ext.cookie_plus = tmp_opt.cookie_plus; 13334957faadSWilliam Allen Simpson } else if (!tp->rx_opt.cookie_in_always) { 13344957faadSWilliam Allen Simpson /* redundant indications, but ensure initialization. */ 13354957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 1; /* true */ 13364957faadSWilliam Allen Simpson tmp_ext.cookie_plus = 0; 13374957faadSWilliam Allen Simpson } else { 13384957faadSWilliam Allen Simpson goto drop_and_release; 13394957faadSWilliam Allen Simpson } 13404957faadSWilliam Allen Simpson tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; 13411da177e4SLinus Torvalds 13424dfc2817SFlorian Westphal if (want_cookie && !tmp_opt.saw_tstamp) 13431da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 13441da177e4SLinus Torvalds 13451da177e4SLinus Torvalds tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 13461da177e4SLinus Torvalds tcp_openreq_init(req, &tmp_opt, skb); 13471da177e4SLinus Torvalds 1348bb5b7c11SDavid S. Miller ireq = inet_rsk(req); 1349bb5b7c11SDavid S. Miller ireq->loc_addr = daddr; 1350bb5b7c11SDavid S. Miller ireq->rmt_addr = saddr; 1351bb5b7c11SDavid S. Miller ireq->no_srccheck = inet_sk(sk)->transparent; 1352bb5b7c11SDavid S. Miller ireq->opt = tcp_v4_save_options(sk, skb); 1353bb5b7c11SDavid S. Miller 1354284904aaSPaul Moore if (security_inet_conn_request(sk, skb, req)) 1355bb5b7c11SDavid S. Miller goto drop_and_free; 1356284904aaSPaul Moore 1357172d69e6SFlorian Westphal if (!want_cookie || tmp_opt.tstamp_ok) 1358aa8223c7SArnaldo Carvalho de Melo TCP_ECN_create_request(req, tcp_hdr(skb)); 13591da177e4SLinus Torvalds 13601da177e4SLinus Torvalds if (want_cookie) { 13611da177e4SLinus Torvalds isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1362172d69e6SFlorian Westphal req->cookie_ts = tmp_opt.tstamp_ok; 13631da177e4SLinus Torvalds } else if (!isn) { 13641da177e4SLinus Torvalds struct inet_peer *peer = NULL; 13656bd023f3SDavid S. Miller struct flowi4 fl4; 13661da177e4SLinus Torvalds 13671da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen 13681da177e4SLinus Torvalds * from the destination in peer table, when entering 13691da177e4SLinus Torvalds * state TIME-WAIT, and check against it before 13701da177e4SLinus Torvalds * accepting new connection request. 13711da177e4SLinus Torvalds * 13721da177e4SLinus Torvalds * If "isn" is not zero, this request hit alive 13731da177e4SLinus Torvalds * timewait bucket, so that all the necessary checks 13741da177e4SLinus Torvalds * are made in the function processing timewait state. 13751da177e4SLinus Torvalds */ 13761da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && 1377295ff7edSArnaldo Carvalho de Melo tcp_death_row.sysctl_tw_recycle && 13786bd023f3SDavid S. Miller (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && 1379ed2361e6SDavid S. Miller fl4.daddr == saddr && 1380ed2361e6SDavid S. Miller (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { 1381317fe0e6SEric Dumazet inet_peer_refcheck(peer); 13822c1409a0SEric Dumazet if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 13831da177e4SLinus Torvalds (s32)(peer->tcp_ts - req->ts_recent) > 13841da177e4SLinus Torvalds TCP_PAWS_WINDOW) { 1385de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 13867cd04fa7SDenis V. Lunev goto drop_and_release; 13871da177e4SLinus Torvalds } 13881da177e4SLinus Torvalds } 13891da177e4SLinus Torvalds /* Kill the following clause, if you dislike this way. */ 13901da177e4SLinus Torvalds else if (!sysctl_tcp_syncookies && 1391463c84b9SArnaldo Carvalho de Melo (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 13921da177e4SLinus Torvalds (sysctl_max_syn_backlog >> 2)) && 13931da177e4SLinus Torvalds (!peer || !peer->tcp_ts_stamp) && 13941da177e4SLinus Torvalds (!dst || !dst_metric(dst, RTAX_RTT))) { 13951da177e4SLinus Torvalds /* Without syncookies last quarter of 13961da177e4SLinus Torvalds * backlog is filled with destinations, 13971da177e4SLinus Torvalds * proven to be alive. 13981da177e4SLinus Torvalds * It means that we continue to communicate 13991da177e4SLinus Torvalds * to destinations, already remembered 14001da177e4SLinus Torvalds * to the moment of synflood. 14011da177e4SLinus Torvalds */ 1402673d57e7SHarvey Harrison LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", 1403673d57e7SHarvey Harrison &saddr, ntohs(tcp_hdr(skb)->source)); 14047cd04fa7SDenis V. Lunev goto drop_and_release; 14051da177e4SLinus Torvalds } 14061da177e4SLinus Torvalds 1407a94f723dSGerrit Renker isn = tcp_v4_init_sequence(skb); 14081da177e4SLinus Torvalds } 14092e6599cbSArnaldo Carvalho de Melo tcp_rsk(req)->snt_isn = isn; 14109ad7c049SJerry Chu tcp_rsk(req)->snt_synack = tcp_time_stamp; 14111da177e4SLinus Torvalds 141272659eccSOctavian Purdila if (tcp_v4_send_synack(sk, dst, req, 14134957faadSWilliam Allen Simpson (struct request_values *)&tmp_ext) || 14144957faadSWilliam Allen Simpson want_cookie) 14151da177e4SLinus Torvalds goto drop_and_free; 14161da177e4SLinus Torvalds 14173f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 14181da177e4SLinus Torvalds return 0; 14191da177e4SLinus Torvalds 14207cd04fa7SDenis V. Lunev drop_and_release: 14217cd04fa7SDenis V. Lunev dst_release(dst); 14221da177e4SLinus Torvalds drop_and_free: 142360236fddSArnaldo Carvalho de Melo reqsk_free(req); 14241da177e4SLinus Torvalds drop: 14251da177e4SLinus Torvalds return 0; 14261da177e4SLinus Torvalds } 14274bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 14281da177e4SLinus Torvalds 14291da177e4SLinus Torvalds 14301da177e4SLinus Torvalds /* 14311da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 14321da177e4SLinus Torvalds * now create the new socket. 14331da177e4SLinus Torvalds */ 14341da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 143560236fddSArnaldo Carvalho de Melo struct request_sock *req, 14361da177e4SLinus Torvalds struct dst_entry *dst) 14371da177e4SLinus Torvalds { 14382e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 14391da177e4SLinus Torvalds struct inet_sock *newinet; 14401da177e4SLinus Torvalds struct tcp_sock *newtp; 14411da177e4SLinus Torvalds struct sock *newsk; 1442cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1443cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1444cfb6eeb4SYOSHIFUJI Hideaki #endif 1445f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 14461da177e4SLinus Torvalds 14471da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 14481da177e4SLinus Torvalds goto exit_overflow; 14491da177e4SLinus Torvalds 14501da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 14511da177e4SLinus Torvalds if (!newsk) 1452093d2823SBalazs Scheidler goto exit_nonewsk; 14531da177e4SLinus Torvalds 1454bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds newtp = tcp_sk(newsk); 14571da177e4SLinus Torvalds newinet = inet_sk(newsk); 14582e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1459c720c7e8SEric Dumazet newinet->inet_daddr = ireq->rmt_addr; 1460c720c7e8SEric Dumazet newinet->inet_rcv_saddr = ireq->loc_addr; 1461c720c7e8SEric Dumazet newinet->inet_saddr = ireq->loc_addr; 1462f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1463f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 14642e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1465463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1466eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 14674c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1468d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1469f6d8bd05SEric Dumazet if (inet_opt) 1470f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1471c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 14721da177e4SLinus Torvalds 14730e734419SDavid S. Miller if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) 14740e734419SDavid S. Miller goto put_and_exit; 14750e734419SDavid S. Miller 14760e734419SDavid S. Miller sk_setup_caps(newsk, dst); 14770e734419SDavid S. Miller 14785d424d5aSJohn Heffner tcp_mtup_init(newsk); 14791da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 14800dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1481f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1482f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1483f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1484f5fff5dcSTom Quetchenbach 14851da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 14869ad7c049SJerry Chu if (tcp_rsk(req)->snt_synack) 14879ad7c049SJerry Chu tcp_valid_rtt_meas(newsk, 14889ad7c049SJerry Chu tcp_time_stamp - tcp_rsk(req)->snt_synack); 14899ad7c049SJerry Chu newtp->total_retrans = req->retrans; 14901da177e4SLinus Torvalds 1491cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1492cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1493a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1494a915da9bSEric Dumazet AF_INET); 1495c720c7e8SEric Dumazet if (key != NULL) { 1496cfb6eeb4SYOSHIFUJI Hideaki /* 1497cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1498cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1499cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1500cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1501cfb6eeb4SYOSHIFUJI Hideaki */ 1502a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1503a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1504a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1505cfb6eeb4SYOSHIFUJI Hideaki } 1506cfb6eeb4SYOSHIFUJI Hideaki #endif 1507cfb6eeb4SYOSHIFUJI Hideaki 15080e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 15090e734419SDavid S. Miller goto put_and_exit; 15109327f705SEric Dumazet __inet_hash_nolisten(newsk, NULL); 15111da177e4SLinus Torvalds 15121da177e4SLinus Torvalds return newsk; 15131da177e4SLinus Torvalds 15141da177e4SLinus Torvalds exit_overflow: 1515de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1516093d2823SBalazs Scheidler exit_nonewsk: 1517093d2823SBalazs Scheidler dst_release(dst); 15181da177e4SLinus Torvalds exit: 1519de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 15201da177e4SLinus Torvalds return NULL; 15210e734419SDavid S. Miller put_and_exit: 1522709e8697SEric Dumazet tcp_clear_xmit_timers(newsk); 1523d8a6e65fSEric Dumazet tcp_cleanup_congestion_control(newsk); 1524918eb399SEric Dumazet bh_unlock_sock(newsk); 15250e734419SDavid S. Miller sock_put(newsk); 15260e734419SDavid S. Miller goto exit; 15271da177e4SLinus Torvalds } 15284bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 15291da177e4SLinus Torvalds 15301da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 15311da177e4SLinus Torvalds { 1532aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 1533eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 15341da177e4SLinus Torvalds struct sock *nsk; 153560236fddSArnaldo Carvalho de Melo struct request_sock **prev; 15361da177e4SLinus Torvalds /* Find possible connection requests. */ 1537463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 15381da177e4SLinus Torvalds iph->saddr, iph->daddr); 15391da177e4SLinus Torvalds if (req) 15401da177e4SLinus Torvalds return tcp_check_req(sk, skb, req, prev); 15411da177e4SLinus Torvalds 15423b1e0a65SYOSHIFUJI Hideaki nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1543c67499c0SPavel Emelyanov th->source, iph->daddr, th->dest, inet_iif(skb)); 15441da177e4SLinus Torvalds 15451da177e4SLinus Torvalds if (nsk) { 15461da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 15471da177e4SLinus Torvalds bh_lock_sock(nsk); 15481da177e4SLinus Torvalds return nsk; 15491da177e4SLinus Torvalds } 15509469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 15511da177e4SLinus Torvalds return NULL; 15521da177e4SLinus Torvalds } 15531da177e4SLinus Torvalds 15541da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1555af9b4738SFlorian Westphal if (!th->syn) 15561da177e4SLinus Torvalds sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 15571da177e4SLinus Torvalds #endif 15581da177e4SLinus Torvalds return sk; 15591da177e4SLinus Torvalds } 15601da177e4SLinus Torvalds 1561b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 15621da177e4SLinus Torvalds { 1563eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1564eddc9ec5SArnaldo Carvalho de Melo 156584fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_COMPLETE) { 1566eddc9ec5SArnaldo Carvalho de Melo if (!tcp_v4_check(skb->len, iph->saddr, 1567eddc9ec5SArnaldo Carvalho de Melo iph->daddr, skb->csum)) { 15681da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 1569fb286bb2SHerbert Xu return 0; 1570fb286bb2SHerbert Xu } 1571fb286bb2SHerbert Xu } 1572fb286bb2SHerbert Xu 1573eddc9ec5SArnaldo Carvalho de Melo skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1574fb286bb2SHerbert Xu skb->len, IPPROTO_TCP, 0); 1575fb286bb2SHerbert Xu 1576fb286bb2SHerbert Xu if (skb->len <= 76) { 1577fb286bb2SHerbert Xu return __skb_checksum_complete(skb); 15781da177e4SLinus Torvalds } 15791da177e4SLinus Torvalds return 0; 15801da177e4SLinus Torvalds } 15811da177e4SLinus Torvalds 15821da177e4SLinus Torvalds 15831da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 15841da177e4SLinus Torvalds * here. 15851da177e4SLinus Torvalds * 15861da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 15871da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 15881da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 15891da177e4SLinus Torvalds * held. 15901da177e4SLinus Torvalds */ 15911da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 15921da177e4SLinus Torvalds { 1593cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1594cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1595cfb6eeb4SYOSHIFUJI Hideaki /* 1596cfb6eeb4SYOSHIFUJI Hideaki * We really want to reject the packet as early as possible 1597cfb6eeb4SYOSHIFUJI Hideaki * if: 1598cfb6eeb4SYOSHIFUJI Hideaki * o We're expecting an MD5'd packet and this is no MD5 tcp option 1599cfb6eeb4SYOSHIFUJI Hideaki * o There is an MD5 option and we're not expecting one 1600cfb6eeb4SYOSHIFUJI Hideaki */ 1601cfb6eeb4SYOSHIFUJI Hideaki if (tcp_v4_inbound_md5_hash(sk, skb)) 1602cfb6eeb4SYOSHIFUJI Hideaki goto discard; 1603cfb6eeb4SYOSHIFUJI Hideaki #endif 1604cfb6eeb4SYOSHIFUJI Hideaki 16051da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1606bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1607aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1608cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 16091da177e4SLinus Torvalds goto reset; 1610cfb6eeb4SYOSHIFUJI Hideaki } 16111da177e4SLinus Torvalds return 0; 16121da177e4SLinus Torvalds } 16131da177e4SLinus Torvalds 1614ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 16151da177e4SLinus Torvalds goto csum_err; 16161da177e4SLinus Torvalds 16171da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 16181da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 16191da177e4SLinus Torvalds if (!nsk) 16201da177e4SLinus Torvalds goto discard; 16211da177e4SLinus Torvalds 16221da177e4SLinus Torvalds if (nsk != sk) { 1623bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 1624cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1625cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 16261da177e4SLinus Torvalds goto reset; 1627cfb6eeb4SYOSHIFUJI Hideaki } 16281da177e4SLinus Torvalds return 0; 16291da177e4SLinus Torvalds } 1630ca55158cSEric Dumazet } else 1631bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1632ca55158cSEric Dumazet 1633aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1634cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 16351da177e4SLinus Torvalds goto reset; 1636cfb6eeb4SYOSHIFUJI Hideaki } 16371da177e4SLinus Torvalds return 0; 16381da177e4SLinus Torvalds 16391da177e4SLinus Torvalds reset: 1640cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 16411da177e4SLinus Torvalds discard: 16421da177e4SLinus Torvalds kfree_skb(skb); 16431da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 16441da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 16451da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 16461da177e4SLinus Torvalds * but you have been warned. 16471da177e4SLinus Torvalds */ 16481da177e4SLinus Torvalds return 0; 16491da177e4SLinus Torvalds 16501da177e4SLinus Torvalds csum_err: 165163231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 16521da177e4SLinus Torvalds goto discard; 16531da177e4SLinus Torvalds } 16544bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 16551da177e4SLinus Torvalds 16561da177e4SLinus Torvalds /* 16571da177e4SLinus Torvalds * From tcp_input.c 16581da177e4SLinus Torvalds */ 16591da177e4SLinus Torvalds 16601da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 16611da177e4SLinus Torvalds { 1662eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1663cf533ea5SEric Dumazet const struct tcphdr *th; 16641da177e4SLinus Torvalds struct sock *sk; 16651da177e4SLinus Torvalds int ret; 1666a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 16671da177e4SLinus Torvalds 16681da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 16691da177e4SLinus Torvalds goto discard_it; 16701da177e4SLinus Torvalds 16711da177e4SLinus Torvalds /* Count it even if it's bad */ 167263231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 16731da177e4SLinus Torvalds 16741da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 16751da177e4SLinus Torvalds goto discard_it; 16761da177e4SLinus Torvalds 1677aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 16781da177e4SLinus Torvalds 16791da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 16801da177e4SLinus Torvalds goto bad_packet; 16811da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 16821da177e4SLinus Torvalds goto discard_it; 16831da177e4SLinus Torvalds 16841da177e4SLinus Torvalds /* An explanation is required here, I think. 16851da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1686caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 16871da177e4SLinus Torvalds * So, we defer the checks. */ 168860476372SHerbert Xu if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 16891da177e4SLinus Torvalds goto bad_packet; 16901da177e4SLinus Torvalds 1691aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1692eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 16931da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 16941da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 16951da177e4SLinus Torvalds skb->len - th->doff * 4); 16961da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 16971da177e4SLinus Torvalds TCP_SKB_CB(skb)->when = 0; 1698b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 16991da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 17001da177e4SLinus Torvalds 17019a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 17021da177e4SLinus Torvalds if (!sk) 17031da177e4SLinus Torvalds goto no_tcp_socket; 17041da177e4SLinus Torvalds 1705bb134d5dSEric Dumazet process: 1706bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1707bb134d5dSEric Dumazet goto do_time_wait; 1708bb134d5dSEric Dumazet 17096cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 17106cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1711d218d111SStephen Hemminger goto discard_and_relse; 17126cce09f8SEric Dumazet } 1713d218d111SStephen Hemminger 17141da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 17151da177e4SLinus Torvalds goto discard_and_relse; 1716b59c2701SPatrick McHardy nf_reset(skb); 17171da177e4SLinus Torvalds 1718fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 17191da177e4SLinus Torvalds goto discard_and_relse; 17201da177e4SLinus Torvalds 17211da177e4SLinus Torvalds skb->dev = NULL; 17221da177e4SLinus Torvalds 1723c6366184SIngo Molnar bh_lock_sock_nested(sk); 17241da177e4SLinus Torvalds ret = 0; 17251da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 17261a2449a8SChris Leech #ifdef CONFIG_NET_DMA 17271a2449a8SChris Leech struct tcp_sock *tp = tcp_sk(sk); 17281a2449a8SChris Leech if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1729f67b4599SDan Williams tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); 17301a2449a8SChris Leech if (tp->ucopy.dma_chan) 17311a2449a8SChris Leech ret = tcp_v4_do_rcv(sk, skb); 17321a2449a8SChris Leech else 17331a2449a8SChris Leech #endif 17341a2449a8SChris Leech { 17351da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 17361da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 17371a2449a8SChris Leech } 17386cce09f8SEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb))) { 17396b03a53aSZhu Yi bh_unlock_sock(sk); 17406cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 17416b03a53aSZhu Yi goto discard_and_relse; 17426b03a53aSZhu Yi } 17431da177e4SLinus Torvalds bh_unlock_sock(sk); 17441da177e4SLinus Torvalds 17451da177e4SLinus Torvalds sock_put(sk); 17461da177e4SLinus Torvalds 17471da177e4SLinus Torvalds return ret; 17481da177e4SLinus Torvalds 17491da177e4SLinus Torvalds no_tcp_socket: 17501da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 17511da177e4SLinus Torvalds goto discard_it; 17521da177e4SLinus Torvalds 17531da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 17541da177e4SLinus Torvalds bad_packet: 175563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 17561da177e4SLinus Torvalds } else { 1757cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 17581da177e4SLinus Torvalds } 17591da177e4SLinus Torvalds 17601da177e4SLinus Torvalds discard_it: 17611da177e4SLinus Torvalds /* Discard frame. */ 17621da177e4SLinus Torvalds kfree_skb(skb); 17631da177e4SLinus Torvalds return 0; 17641da177e4SLinus Torvalds 17651da177e4SLinus Torvalds discard_and_relse: 17661da177e4SLinus Torvalds sock_put(sk); 17671da177e4SLinus Torvalds goto discard_it; 17681da177e4SLinus Torvalds 17691da177e4SLinus Torvalds do_time_wait: 17701da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 17719469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17721da177e4SLinus Torvalds goto discard_it; 17731da177e4SLinus Torvalds } 17741da177e4SLinus Torvalds 17751da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 177663231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 17779469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17781da177e4SLinus Torvalds goto discard_it; 17791da177e4SLinus Torvalds } 17809469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 17811da177e4SLinus Torvalds case TCP_TW_SYN: { 1782c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1783c67499c0SPavel Emelyanov &tcp_hashinfo, 1784eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1785463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 17861da177e4SLinus Torvalds if (sk2) { 17879469c7b4SYOSHIFUJI Hideaki inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 17889469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17891da177e4SLinus Torvalds sk = sk2; 17901da177e4SLinus Torvalds goto process; 17911da177e4SLinus Torvalds } 17921da177e4SLinus Torvalds /* Fall through to ACK */ 17931da177e4SLinus Torvalds } 17941da177e4SLinus Torvalds case TCP_TW_ACK: 17951da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 17961da177e4SLinus Torvalds break; 17971da177e4SLinus Torvalds case TCP_TW_RST: 17981da177e4SLinus Torvalds goto no_tcp_socket; 17991da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 18001da177e4SLinus Torvalds } 18011da177e4SLinus Torvalds goto discard_it; 18021da177e4SLinus Torvalds } 18031da177e4SLinus Torvalds 18043f419d2dSDavid S. Miller struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) 18051da177e4SLinus Torvalds { 18061da177e4SLinus Torvalds struct rtable *rt = (struct rtable *) __sk_dst_get(sk); 18073f419d2dSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 18083f419d2dSDavid S. Miller struct inet_peer *peer; 18091da177e4SLinus Torvalds 1810c5216cc7SDavid S. Miller if (!rt || 1811c5216cc7SDavid S. Miller inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { 1812b534ecf1SDavid S. Miller peer = inet_getpeer_v4(inet->inet_daddr, 1); 18133f419d2dSDavid S. Miller *release_it = true; 18141da177e4SLinus Torvalds } else { 18151da177e4SLinus Torvalds if (!rt->peer) 1816a48eff12SDavid S. Miller rt_bind_peer(rt, inet->inet_daddr, 1); 18171da177e4SLinus Torvalds peer = rt->peer; 18183f419d2dSDavid S. Miller *release_it = false; 18191da177e4SLinus Torvalds } 18201da177e4SLinus Torvalds 18213f419d2dSDavid S. Miller return peer; 18221da177e4SLinus Torvalds } 18233f419d2dSDavid S. Miller EXPORT_SYMBOL(tcp_v4_get_peer); 18241da177e4SLinus Torvalds 1825ccb7c410SDavid S. Miller void *tcp_v4_tw_get_peer(struct sock *sk) 18261da177e4SLinus Torvalds { 1827cf533ea5SEric Dumazet const struct inet_timewait_sock *tw = inet_twsk(sk); 18281da177e4SLinus Torvalds 1829ccb7c410SDavid S. Miller return inet_getpeer_v4(tw->tw_daddr, 1); 1830ccb7c410SDavid S. Miller } 1831ccb7c410SDavid S. Miller EXPORT_SYMBOL(tcp_v4_tw_get_peer); 18328feaf0c0SArnaldo Carvalho de Melo 1833ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1834ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1835ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1836ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1837ccb7c410SDavid S. Miller .twsk_getpeer = tcp_v4_tw_get_peer, 1838ccb7c410SDavid S. Miller }; 18391da177e4SLinus Torvalds 18403b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 18411da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 18421da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 184332519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 18441da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 18451da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 18463f419d2dSDavid S. Miller .get_peer = tcp_v4_get_peer, 18471da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 18481da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 18491da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1850543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1851543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 1852ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 18533fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 18543fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 18553fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 18563fdadf7dSDmitry Mishin #endif 18571da177e4SLinus Torvalds }; 18584bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 18591da177e4SLinus Torvalds 1860cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1861b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1862cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 186349a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1864cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1865cfb6eeb4SYOSHIFUJI Hideaki }; 1866b6332e6cSAndrew Morton #endif 1867cfb6eeb4SYOSHIFUJI Hideaki 18681da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 18691da177e4SLinus Torvalds * sk_alloc() so need not be done here. 18701da177e4SLinus Torvalds */ 18711da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 18721da177e4SLinus Torvalds { 18736687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 18741da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 18751da177e4SLinus Torvalds 18761da177e4SLinus Torvalds skb_queue_head_init(&tp->out_of_order_queue); 18771da177e4SLinus Torvalds tcp_init_xmit_timers(sk); 18781da177e4SLinus Torvalds tcp_prequeue_init(tp); 18791da177e4SLinus Torvalds 18806687e988SArnaldo Carvalho de Melo icsk->icsk_rto = TCP_TIMEOUT_INIT; 18811da177e4SLinus Torvalds tp->mdev = TCP_TIMEOUT_INIT; 18821da177e4SLinus Torvalds 18831da177e4SLinus Torvalds /* So many TCP implementations out there (incorrectly) count the 18841da177e4SLinus Torvalds * initial SYN frame in their delayed-ACK and congestion control 18851da177e4SLinus Torvalds * algorithms that we must have the following bandaid to talk 18861da177e4SLinus Torvalds * efficiently to them. -DaveM 18871da177e4SLinus Torvalds */ 18889ad7c049SJerry Chu tp->snd_cwnd = TCP_INIT_CWND; 18891da177e4SLinus Torvalds 18901da177e4SLinus Torvalds /* See draft-stevens-tcpca-spec-01 for discussion of the 18911da177e4SLinus Torvalds * initialization of these values. 18921da177e4SLinus Torvalds */ 18930b6a05c1SIlpo Järvinen tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 18941da177e4SLinus Torvalds tp->snd_cwnd_clamp = ~0; 1895bee7ca9eSWilliam Allen Simpson tp->mss_cache = TCP_MSS_DEFAULT; 18961da177e4SLinus Torvalds 18971da177e4SLinus Torvalds tp->reordering = sysctl_tcp_reordering; 18986687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops = &tcp_init_congestion_ops; 18991da177e4SLinus Torvalds 19001da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 19011da177e4SLinus Torvalds 19021da177e4SLinus Torvalds sk->sk_write_space = sk_stream_write_space; 19031da177e4SLinus Torvalds sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 19041da177e4SLinus Torvalds 19058292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1906d83d8461SArnaldo Carvalho de Melo icsk->icsk_sync_mss = tcp_sync_mss; 1907cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1908cfb6eeb4SYOSHIFUJI Hideaki tp->af_specific = &tcp_sock_ipv4_specific; 1909cfb6eeb4SYOSHIFUJI Hideaki #endif 19101da177e4SLinus Torvalds 1911435cf559SWilliam Allen Simpson /* TCP Cookie Transactions */ 1912435cf559SWilliam Allen Simpson if (sysctl_tcp_cookie_size > 0) { 1913435cf559SWilliam Allen Simpson /* Default, cookies without s_data_payload. */ 1914435cf559SWilliam Allen Simpson tp->cookie_values = 1915435cf559SWilliam Allen Simpson kzalloc(sizeof(*tp->cookie_values), 1916435cf559SWilliam Allen Simpson sk->sk_allocation); 1917435cf559SWilliam Allen Simpson if (tp->cookie_values != NULL) 1918435cf559SWilliam Allen Simpson kref_init(&tp->cookie_values->kref); 1919435cf559SWilliam Allen Simpson } 1920435cf559SWilliam Allen Simpson /* Presumed zeroed, in order of appearance: 1921435cf559SWilliam Allen Simpson * cookie_in_always, cookie_out_never, 1922435cf559SWilliam Allen Simpson * s_data_constant, s_data_in, s_data_out 1923435cf559SWilliam Allen Simpson */ 19241da177e4SLinus Torvalds sk->sk_sndbuf = sysctl_tcp_wmem[1]; 19251da177e4SLinus Torvalds sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 19261da177e4SLinus Torvalds 1927eb4dea58SHerbert Xu local_bh_disable(); 1928d1a4c0b3SGlauber Costa sock_update_memcg(sk); 1929180d8cd9SGlauber Costa sk_sockets_allocated_inc(sk); 1930eb4dea58SHerbert Xu local_bh_enable(); 19311da177e4SLinus Torvalds 19321da177e4SLinus Torvalds return 0; 19331da177e4SLinus Torvalds } 19341da177e4SLinus Torvalds 19357d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 19361da177e4SLinus Torvalds { 19371da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 19381da177e4SLinus Torvalds 19391da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 19401da177e4SLinus Torvalds 19416687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1942317a76f9SStephen Hemminger 19431da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1944fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 19451da177e4SLinus Torvalds 19461da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 19471da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 19481da177e4SLinus Torvalds 1949cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1950cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1951cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1952a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1953a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1954cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1955cfb6eeb4SYOSHIFUJI Hideaki } 1956cfb6eeb4SYOSHIFUJI Hideaki #endif 1957cfb6eeb4SYOSHIFUJI Hideaki 19581a2449a8SChris Leech #ifdef CONFIG_NET_DMA 19591a2449a8SChris Leech /* Cleans up our sk_async_wait_queue */ 19601a2449a8SChris Leech __skb_queue_purge(&sk->sk_async_wait_queue); 19611a2449a8SChris Leech #endif 19621a2449a8SChris Leech 19631da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 19641da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 19651da177e4SLinus Torvalds 19661da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1967463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1968ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 19691da177e4SLinus Torvalds 19701da177e4SLinus Torvalds /* 19711da177e4SLinus Torvalds * If sendmsg cached page exists, toss it. 19721da177e4SLinus Torvalds */ 19731da177e4SLinus Torvalds if (sk->sk_sndmsg_page) { 19741da177e4SLinus Torvalds __free_page(sk->sk_sndmsg_page); 19751da177e4SLinus Torvalds sk->sk_sndmsg_page = NULL; 19761da177e4SLinus Torvalds } 19771da177e4SLinus Torvalds 1978435cf559SWilliam Allen Simpson /* TCP Cookie Transactions */ 1979435cf559SWilliam Allen Simpson if (tp->cookie_values != NULL) { 1980435cf559SWilliam Allen Simpson kref_put(&tp->cookie_values->kref, 1981435cf559SWilliam Allen Simpson tcp_cookie_values_release); 1982435cf559SWilliam Allen Simpson tp->cookie_values = NULL; 1983435cf559SWilliam Allen Simpson } 1984435cf559SWilliam Allen Simpson 1985180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 1986d1a4c0b3SGlauber Costa sock_release_memcg(sk); 19871da177e4SLinus Torvalds } 19881da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 19891da177e4SLinus Torvalds 19901da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 19911da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 19921da177e4SLinus Torvalds 19933ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 19941da177e4SLinus Torvalds { 19953ab5aee7SEric Dumazet return hlist_nulls_empty(head) ? NULL : 19968feaf0c0SArnaldo Carvalho de Melo list_entry(head->first, struct inet_timewait_sock, tw_node); 19971da177e4SLinus Torvalds } 19981da177e4SLinus Torvalds 19998feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 20001da177e4SLinus Torvalds { 20013ab5aee7SEric Dumazet return !is_a_nulls(tw->tw_node.next) ? 20023ab5aee7SEric Dumazet hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 20031da177e4SLinus Torvalds } 20041da177e4SLinus Torvalds 2005a8b690f9STom Herbert /* 2006a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 2007a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 2008a8b690f9STom Herbert * very first socket in the hash table is returned. 2009a8b690f9STom Herbert */ 20101da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 20111da177e4SLinus Torvalds { 2012463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 2013c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 20141da177e4SLinus Torvalds struct sock *sk = cur; 20155caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 20161da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2017a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 20181da177e4SLinus Torvalds 20191da177e4SLinus Torvalds if (!sk) { 2020a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20215caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2022c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 2023a8b690f9STom Herbert st->offset = 0; 20241da177e4SLinus Torvalds goto get_sk; 20251da177e4SLinus Torvalds } 20265caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20271da177e4SLinus Torvalds ++st->num; 2028a8b690f9STom Herbert ++st->offset; 20291da177e4SLinus Torvalds 20301da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 203160236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 20321da177e4SLinus Torvalds 2033463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 20341da177e4SLinus Torvalds req = req->dl_next; 20351da177e4SLinus Torvalds while (1) { 20361da177e4SLinus Torvalds while (req) { 2037bdccc4caSDaniel Lezcano if (req->rsk_ops->family == st->family) { 20381da177e4SLinus Torvalds cur = req; 20391da177e4SLinus Torvalds goto out; 20401da177e4SLinus Torvalds } 20411da177e4SLinus Torvalds req = req->dl_next; 20421da177e4SLinus Torvalds } 204372a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 20441da177e4SLinus Torvalds break; 20451da177e4SLinus Torvalds get_req: 2046463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 20471da177e4SLinus Torvalds } 20481bde5ac4SEric Dumazet sk = sk_nulls_next(st->syn_wait_sk); 20491da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 2050463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20511da177e4SLinus Torvalds } else { 2052463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2053463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2054463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 20551da177e4SLinus Torvalds goto start_req; 2056463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20571bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 20581da177e4SLinus Torvalds } 20591da177e4SLinus Torvalds get_sk: 2060c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 20618475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 20628475ef9fSPavel Emelyanov continue; 20638475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 20641da177e4SLinus Torvalds cur = sk; 20651da177e4SLinus Torvalds goto out; 20661da177e4SLinus Torvalds } 2067463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2068463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2069463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 20701da177e4SLinus Torvalds start_req: 20711da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 20721da177e4SLinus Torvalds st->syn_wait_sk = sk; 20731da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 20741da177e4SLinus Torvalds st->sbucket = 0; 20751da177e4SLinus Torvalds goto get_req; 20761da177e4SLinus Torvalds } 2077463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20781da177e4SLinus Torvalds } 20795caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 2080a8b690f9STom Herbert st->offset = 0; 20810f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 20825caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20835caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2084c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 20851da177e4SLinus Torvalds goto get_sk; 20861da177e4SLinus Torvalds } 20871da177e4SLinus Torvalds cur = NULL; 20881da177e4SLinus Torvalds out: 20891da177e4SLinus Torvalds return cur; 20901da177e4SLinus Torvalds } 20911da177e4SLinus Torvalds 20921da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 20931da177e4SLinus Torvalds { 2094a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2095a8b690f9STom Herbert void *rc; 2096a8b690f9STom Herbert 2097a8b690f9STom Herbert st->bucket = 0; 2098a8b690f9STom Herbert st->offset = 0; 2099a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 21001da177e4SLinus Torvalds 21011da177e4SLinus Torvalds while (rc && *pos) { 21021da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 21031da177e4SLinus Torvalds --*pos; 21041da177e4SLinus Torvalds } 21051da177e4SLinus Torvalds return rc; 21061da177e4SLinus Torvalds } 21071da177e4SLinus Torvalds 21086eac5604SAndi Kleen static inline int empty_bucket(struct tcp_iter_state *st) 21096eac5604SAndi Kleen { 21103ab5aee7SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 21113ab5aee7SEric Dumazet hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 21126eac5604SAndi Kleen } 21136eac5604SAndi Kleen 2114a8b690f9STom Herbert /* 2115a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 2116a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 2117a8b690f9STom Herbert */ 21181da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 21191da177e4SLinus Torvalds { 21201da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2121a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 21221da177e4SLinus Torvalds void *rc = NULL; 21231da177e4SLinus Torvalds 2124a8b690f9STom Herbert st->offset = 0; 2125a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 21261da177e4SLinus Torvalds struct sock *sk; 21273ab5aee7SEric Dumazet struct hlist_nulls_node *node; 21288feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 21299db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 21301da177e4SLinus Torvalds 21316eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 21326eac5604SAndi Kleen if (empty_bucket(st)) 21336eac5604SAndi Kleen continue; 21346eac5604SAndi Kleen 21359db66bdcSEric Dumazet spin_lock_bh(lock); 21363ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2137f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 2138878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 21391da177e4SLinus Torvalds continue; 21401da177e4SLinus Torvalds } 21411da177e4SLinus Torvalds rc = sk; 21421da177e4SLinus Torvalds goto out; 21431da177e4SLinus Torvalds } 21441da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 21458feaf0c0SArnaldo Carvalho de Melo inet_twsk_for_each(tw, node, 2146dbca9b27SEric Dumazet &tcp_hashinfo.ehash[st->bucket].twchain) { 214728518fc1SPavel Emelyanov if (tw->tw_family != st->family || 2148878628fbSYOSHIFUJI Hideaki !net_eq(twsk_net(tw), net)) { 21491da177e4SLinus Torvalds continue; 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds rc = tw; 21521da177e4SLinus Torvalds goto out; 21531da177e4SLinus Torvalds } 21549db66bdcSEric Dumazet spin_unlock_bh(lock); 21551da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21561da177e4SLinus Torvalds } 21571da177e4SLinus Torvalds out: 21581da177e4SLinus Torvalds return rc; 21591da177e4SLinus Torvalds } 21601da177e4SLinus Torvalds 21611da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 21621da177e4SLinus Torvalds { 21631da177e4SLinus Torvalds struct sock *sk = cur; 21648feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 21653ab5aee7SEric Dumazet struct hlist_nulls_node *node; 21661da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2167a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 21681da177e4SLinus Torvalds 21691da177e4SLinus Torvalds ++st->num; 2170a8b690f9STom Herbert ++st->offset; 21711da177e4SLinus Torvalds 21721da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 21731da177e4SLinus Torvalds tw = cur; 21741da177e4SLinus Torvalds tw = tw_next(tw); 21751da177e4SLinus Torvalds get_tw: 2176878628fbSYOSHIFUJI Hideaki while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 21771da177e4SLinus Torvalds tw = tw_next(tw); 21781da177e4SLinus Torvalds } 21791da177e4SLinus Torvalds if (tw) { 21801da177e4SLinus Torvalds cur = tw; 21811da177e4SLinus Torvalds goto out; 21821da177e4SLinus Torvalds } 21839db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21841da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21851da177e4SLinus Torvalds 21866eac5604SAndi Kleen /* Look for next non empty bucket */ 2187a8b690f9STom Herbert st->offset = 0; 2188f373b53bSEric Dumazet while (++st->bucket <= tcp_hashinfo.ehash_mask && 21896eac5604SAndi Kleen empty_bucket(st)) 21906eac5604SAndi Kleen ; 2191f373b53bSEric Dumazet if (st->bucket > tcp_hashinfo.ehash_mask) 21926eac5604SAndi Kleen return NULL; 21936eac5604SAndi Kleen 21949db66bdcSEric Dumazet spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21953ab5aee7SEric Dumazet sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 21961da177e4SLinus Torvalds } else 21973ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 21981da177e4SLinus Torvalds 21993ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 2200878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 22011da177e4SLinus Torvalds goto found; 22021da177e4SLinus Torvalds } 22031da177e4SLinus Torvalds 22041da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 2205dbca9b27SEric Dumazet tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 22061da177e4SLinus Torvalds goto get_tw; 22071da177e4SLinus Torvalds found: 22081da177e4SLinus Torvalds cur = sk; 22091da177e4SLinus Torvalds out: 22101da177e4SLinus Torvalds return cur; 22111da177e4SLinus Torvalds } 22121da177e4SLinus Torvalds 22131da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 22141da177e4SLinus Torvalds { 2215a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2216a8b690f9STom Herbert void *rc; 2217a8b690f9STom Herbert 2218a8b690f9STom Herbert st->bucket = 0; 2219a8b690f9STom Herbert rc = established_get_first(seq); 22201da177e4SLinus Torvalds 22211da177e4SLinus Torvalds while (rc && pos) { 22221da177e4SLinus Torvalds rc = established_get_next(seq, rc); 22231da177e4SLinus Torvalds --pos; 22241da177e4SLinus Torvalds } 22251da177e4SLinus Torvalds return rc; 22261da177e4SLinus Torvalds } 22271da177e4SLinus Torvalds 22281da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 22291da177e4SLinus Torvalds { 22301da177e4SLinus Torvalds void *rc; 22311da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 22321da177e4SLinus Torvalds 22331da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 22341da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 22351da177e4SLinus Torvalds 22361da177e4SLinus Torvalds if (!rc) { 22371da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 22381da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 22391da177e4SLinus Torvalds } 22401da177e4SLinus Torvalds 22411da177e4SLinus Torvalds return rc; 22421da177e4SLinus Torvalds } 22431da177e4SLinus Torvalds 2244a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2245a8b690f9STom Herbert { 2246a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2247a8b690f9STom Herbert int offset = st->offset; 2248a8b690f9STom Herbert int orig_num = st->num; 2249a8b690f9STom Herbert void *rc = NULL; 2250a8b690f9STom Herbert 2251a8b690f9STom Herbert switch (st->state) { 2252a8b690f9STom Herbert case TCP_SEQ_STATE_OPENREQ: 2253a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2254a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2255a8b690f9STom Herbert break; 2256a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2257a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2258a8b690f9STom Herbert while (offset-- && rc) 2259a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2260a8b690f9STom Herbert if (rc) 2261a8b690f9STom Herbert break; 2262a8b690f9STom Herbert st->bucket = 0; 2263a8b690f9STom Herbert /* Fallthrough */ 2264a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2265a8b690f9STom Herbert case TCP_SEQ_STATE_TIME_WAIT: 2266a8b690f9STom Herbert st->state = TCP_SEQ_STATE_ESTABLISHED; 2267a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2268a8b690f9STom Herbert break; 2269a8b690f9STom Herbert rc = established_get_first(seq); 2270a8b690f9STom Herbert while (offset-- && rc) 2271a8b690f9STom Herbert rc = established_get_next(seq, rc); 2272a8b690f9STom Herbert } 2273a8b690f9STom Herbert 2274a8b690f9STom Herbert st->num = orig_num; 2275a8b690f9STom Herbert 2276a8b690f9STom Herbert return rc; 2277a8b690f9STom Herbert } 2278a8b690f9STom Herbert 22791da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 22801da177e4SLinus Torvalds { 22811da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2282a8b690f9STom Herbert void *rc; 2283a8b690f9STom Herbert 2284a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2285a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2286a8b690f9STom Herbert if (rc) 2287a8b690f9STom Herbert goto out; 2288a8b690f9STom Herbert } 2289a8b690f9STom Herbert 22901da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 22911da177e4SLinus Torvalds st->num = 0; 2292a8b690f9STom Herbert st->bucket = 0; 2293a8b690f9STom Herbert st->offset = 0; 2294a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2295a8b690f9STom Herbert 2296a8b690f9STom Herbert out: 2297a8b690f9STom Herbert st->last_pos = *pos; 2298a8b690f9STom Herbert return rc; 22991da177e4SLinus Torvalds } 23001da177e4SLinus Torvalds 23011da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 23021da177e4SLinus Torvalds { 2303a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 23041da177e4SLinus Torvalds void *rc = NULL; 23051da177e4SLinus Torvalds 23061da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 23071da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 23081da177e4SLinus Torvalds goto out; 23091da177e4SLinus Torvalds } 23101da177e4SLinus Torvalds 23111da177e4SLinus Torvalds switch (st->state) { 23121da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 23131da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23141da177e4SLinus Torvalds rc = listening_get_next(seq, v); 23151da177e4SLinus Torvalds if (!rc) { 23161da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2317a8b690f9STom Herbert st->bucket = 0; 2318a8b690f9STom Herbert st->offset = 0; 23191da177e4SLinus Torvalds rc = established_get_first(seq); 23201da177e4SLinus Torvalds } 23211da177e4SLinus Torvalds break; 23221da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 23231da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 23241da177e4SLinus Torvalds rc = established_get_next(seq, v); 23251da177e4SLinus Torvalds break; 23261da177e4SLinus Torvalds } 23271da177e4SLinus Torvalds out: 23281da177e4SLinus Torvalds ++*pos; 2329a8b690f9STom Herbert st->last_pos = *pos; 23301da177e4SLinus Torvalds return rc; 23311da177e4SLinus Torvalds } 23321da177e4SLinus Torvalds 23331da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 23341da177e4SLinus Torvalds { 23351da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 23361da177e4SLinus Torvalds 23371da177e4SLinus Torvalds switch (st->state) { 23381da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 23391da177e4SLinus Torvalds if (v) { 2340463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2341463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 23421da177e4SLinus Torvalds } 23431da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23441da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 23455caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 23461da177e4SLinus Torvalds break; 23471da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 23481da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 23491da177e4SLinus Torvalds if (v) 23509db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 23511da177e4SLinus Torvalds break; 23521da177e4SLinus Torvalds } 23531da177e4SLinus Torvalds } 23541da177e4SLinus Torvalds 235573cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 23561da177e4SLinus Torvalds { 23571da177e4SLinus Torvalds struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 23581da177e4SLinus Torvalds struct tcp_iter_state *s; 235952d6f3f1SDenis V. Lunev int err; 23601da177e4SLinus Torvalds 236152d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 236252d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 236352d6f3f1SDenis V. Lunev if (err < 0) 236452d6f3f1SDenis V. Lunev return err; 2365f40c8174SDaniel Lezcano 236652d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 23671da177e4SLinus Torvalds s->family = afinfo->family; 2368a8b690f9STom Herbert s->last_pos = 0; 2369f40c8174SDaniel Lezcano return 0; 2370f40c8174SDaniel Lezcano } 237173cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2372f40c8174SDaniel Lezcano 23736f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 23741da177e4SLinus Torvalds { 23751da177e4SLinus Torvalds int rc = 0; 23761da177e4SLinus Torvalds struct proc_dir_entry *p; 23771da177e4SLinus Torvalds 23789427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 23799427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 23809427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 23819427c4b3SDenis V. Lunev 238284841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 238373cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 238484841c3cSDenis V. Lunev if (!p) 23851da177e4SLinus Torvalds rc = -ENOMEM; 23861da177e4SLinus Torvalds return rc; 23871da177e4SLinus Torvalds } 23884bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 23891da177e4SLinus Torvalds 23906f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 23911da177e4SLinus Torvalds { 23926f8b13bcSDaniel Lezcano proc_net_remove(net, afinfo->name); 23931da177e4SLinus Torvalds } 23944bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 23951da177e4SLinus Torvalds 2396cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req, 23975e659e4cSPavel Emelyanov struct seq_file *f, int i, int uid, int *len) 23981da177e4SLinus Torvalds { 23992e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 24001da177e4SLinus Torvalds int ttd = req->expires - jiffies; 24011da177e4SLinus Torvalds 24025e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 240371338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", 24041da177e4SLinus Torvalds i, 24052e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 2406c720c7e8SEric Dumazet ntohs(inet_sk(sk)->inet_sport), 24072e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 24082e6599cbSArnaldo Carvalho de Melo ntohs(ireq->rmt_port), 24091da177e4SLinus Torvalds TCP_SYN_RECV, 24101da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 24111da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 24121da177e4SLinus Torvalds jiffies_to_clock_t(ttd), 24131da177e4SLinus Torvalds req->retrans, 24141da177e4SLinus Torvalds uid, 24151da177e4SLinus Torvalds 0, /* non standard timer */ 24161da177e4SLinus Torvalds 0, /* open_requests have no inode */ 24171da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 24185e659e4cSPavel Emelyanov req, 24195e659e4cSPavel Emelyanov len); 24201da177e4SLinus Torvalds } 24211da177e4SLinus Torvalds 24225e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) 24231da177e4SLinus Torvalds { 24241da177e4SLinus Torvalds int timer_active; 24251da177e4SLinus Torvalds unsigned long timer_expires; 2426cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2427cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2428cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 2429c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2430c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2431c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2432c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 243349d09007SEric Dumazet int rx_queue; 24341da177e4SLinus Torvalds 2435463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 24361da177e4SLinus Torvalds timer_active = 1; 2437463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2438463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 24391da177e4SLinus Torvalds timer_active = 4; 2440463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2441cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 24421da177e4SLinus Torvalds timer_active = 2; 2443cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 24441da177e4SLinus Torvalds } else { 24451da177e4SLinus Torvalds timer_active = 0; 24461da177e4SLinus Torvalds timer_expires = jiffies; 24471da177e4SLinus Torvalds } 24481da177e4SLinus Torvalds 244949d09007SEric Dumazet if (sk->sk_state == TCP_LISTEN) 245049d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 245149d09007SEric Dumazet else 245249d09007SEric Dumazet /* 245349d09007SEric Dumazet * because we dont lock socket, we might find a transient negative value 245449d09007SEric Dumazet */ 245549d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 245649d09007SEric Dumazet 24575e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 245871338aa7SDan Rosenberg "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", 2459cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 246047da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 246149d09007SEric Dumazet rx_queue, 24621da177e4SLinus Torvalds timer_active, 24631da177e4SLinus Torvalds jiffies_to_clock_t(timer_expires - jiffies), 2464463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2465cf4c6bf8SIlpo Järvinen sock_i_uid(sk), 24666687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2467cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2468cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 24697be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 24707be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2471463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 24721da177e4SLinus Torvalds tp->snd_cwnd, 24730b6a05c1SIlpo Järvinen tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, 24745e659e4cSPavel Emelyanov len); 24751da177e4SLinus Torvalds } 24761da177e4SLinus Torvalds 2477cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 24785e659e4cSPavel Emelyanov struct seq_file *f, int i, int *len) 24791da177e4SLinus Torvalds { 248023f33c2dSAl Viro __be32 dest, src; 24811da177e4SLinus Torvalds __u16 destp, srcp; 24821da177e4SLinus Torvalds int ttd = tw->tw_ttd - jiffies; 24831da177e4SLinus Torvalds 24841da177e4SLinus Torvalds if (ttd < 0) 24851da177e4SLinus Torvalds ttd = 0; 24861da177e4SLinus Torvalds 24871da177e4SLinus Torvalds dest = tw->tw_daddr; 24881da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 24891da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 24901da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 24911da177e4SLinus Torvalds 24925e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 249371338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", 24941da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 24951da177e4SLinus Torvalds 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 24965e659e4cSPavel Emelyanov atomic_read(&tw->tw_refcnt), tw, len); 24971da177e4SLinus Torvalds } 24981da177e4SLinus Torvalds 24991da177e4SLinus Torvalds #define TMPSZ 150 25001da177e4SLinus Torvalds 25011da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 25021da177e4SLinus Torvalds { 25031da177e4SLinus Torvalds struct tcp_iter_state *st; 25045e659e4cSPavel Emelyanov int len; 25051da177e4SLinus Torvalds 25061da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 25071da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, 25081da177e4SLinus Torvalds " sl local_address rem_address st tx_queue " 25091da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 25101da177e4SLinus Torvalds "inode"); 25111da177e4SLinus Torvalds goto out; 25121da177e4SLinus Torvalds } 25131da177e4SLinus Torvalds st = seq->private; 25141da177e4SLinus Torvalds 25151da177e4SLinus Torvalds switch (st->state) { 25161da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 25171da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 25185e659e4cSPavel Emelyanov get_tcp4_sock(v, seq, st->num, &len); 25191da177e4SLinus Torvalds break; 25201da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 25215e659e4cSPavel Emelyanov get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 25221da177e4SLinus Torvalds break; 25231da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 25245e659e4cSPavel Emelyanov get_timewait4_sock(v, seq, st->num, &len); 25251da177e4SLinus Torvalds break; 25261da177e4SLinus Torvalds } 25275e659e4cSPavel Emelyanov seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 25281da177e4SLinus Torvalds out: 25291da177e4SLinus Torvalds return 0; 25301da177e4SLinus Torvalds } 25311da177e4SLinus Torvalds 253273cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 253373cb88ecSArjan van de Ven .owner = THIS_MODULE, 253473cb88ecSArjan van de Ven .open = tcp_seq_open, 253573cb88ecSArjan van de Ven .read = seq_read, 253673cb88ecSArjan van de Ven .llseek = seq_lseek, 253773cb88ecSArjan van de Ven .release = seq_release_net 253873cb88ecSArjan van de Ven }; 253973cb88ecSArjan van de Ven 25401da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 25411da177e4SLinus Torvalds .name = "tcp", 25421da177e4SLinus Torvalds .family = AF_INET, 254373cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 25449427c4b3SDenis V. Lunev .seq_ops = { 25459427c4b3SDenis V. Lunev .show = tcp4_seq_show, 25469427c4b3SDenis V. Lunev }, 25471da177e4SLinus Torvalds }; 25481da177e4SLinus Torvalds 25492c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2550757764f6SPavel Emelyanov { 2551757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2552757764f6SPavel Emelyanov } 2553757764f6SPavel Emelyanov 25542c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2555757764f6SPavel Emelyanov { 2556757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2557757764f6SPavel Emelyanov } 2558757764f6SPavel Emelyanov 2559757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2560757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2561757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2562757764f6SPavel Emelyanov }; 2563757764f6SPavel Emelyanov 25641da177e4SLinus Torvalds int __init tcp4_proc_init(void) 25651da177e4SLinus Torvalds { 2566757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 25671da177e4SLinus Torvalds } 25681da177e4SLinus Torvalds 25691da177e4SLinus Torvalds void tcp4_proc_exit(void) 25701da177e4SLinus Torvalds { 2571757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 25721da177e4SLinus Torvalds } 25731da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 25741da177e4SLinus Torvalds 2575bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2576bf296b12SHerbert Xu { 2577b71d1d42SEric Dumazet const struct iphdr *iph = skb_gro_network_header(skb); 2578bf296b12SHerbert Xu 2579bf296b12SHerbert Xu switch (skb->ip_summed) { 2580bf296b12SHerbert Xu case CHECKSUM_COMPLETE: 258186911732SHerbert Xu if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 2582bf296b12SHerbert Xu skb->csum)) { 2583bf296b12SHerbert Xu skb->ip_summed = CHECKSUM_UNNECESSARY; 2584bf296b12SHerbert Xu break; 2585bf296b12SHerbert Xu } 2586bf296b12SHerbert Xu 2587bf296b12SHerbert Xu /* fall through */ 2588bf296b12SHerbert Xu case CHECKSUM_NONE: 2589bf296b12SHerbert Xu NAPI_GRO_CB(skb)->flush = 1; 2590bf296b12SHerbert Xu return NULL; 2591bf296b12SHerbert Xu } 2592bf296b12SHerbert Xu 2593bf296b12SHerbert Xu return tcp_gro_receive(head, skb); 2594bf296b12SHerbert Xu } 2595bf296b12SHerbert Xu 2596bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb) 2597bf296b12SHerbert Xu { 2598b71d1d42SEric Dumazet const struct iphdr *iph = ip_hdr(skb); 2599bf296b12SHerbert Xu struct tcphdr *th = tcp_hdr(skb); 2600bf296b12SHerbert Xu 2601bf296b12SHerbert Xu th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2602bf296b12SHerbert Xu iph->saddr, iph->daddr, 0); 2603bf296b12SHerbert Xu skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 2604bf296b12SHerbert Xu 2605bf296b12SHerbert Xu return tcp_gro_complete(skb); 2606bf296b12SHerbert Xu } 2607bf296b12SHerbert Xu 26081da177e4SLinus Torvalds struct proto tcp_prot = { 26091da177e4SLinus Torvalds .name = "TCP", 26101da177e4SLinus Torvalds .owner = THIS_MODULE, 26111da177e4SLinus Torvalds .close = tcp_close, 26121da177e4SLinus Torvalds .connect = tcp_v4_connect, 26131da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2614463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 26151da177e4SLinus Torvalds .ioctl = tcp_ioctl, 26161da177e4SLinus Torvalds .init = tcp_v4_init_sock, 26171da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 26181da177e4SLinus Torvalds .shutdown = tcp_shutdown, 26191da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 26201da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 26211da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 26227ba42910SChangli Gao .sendmsg = tcp_sendmsg, 26237ba42910SChangli Gao .sendpage = tcp_sendpage, 26241da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 2625ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2626ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2627ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 26281da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 26291da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 26300a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 26311da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 26321da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 26331da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 26341da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 26351da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 26361da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 26373ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 26386d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 263960236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 264039d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 26417ba42910SChangli Gao .no_autobind = true, 2642543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2643543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2644543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2645543d9cfeSArnaldo Carvalho de Melo #endif 2646d1a4c0b3SGlauber Costa #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 2647d1a4c0b3SGlauber Costa .init_cgroup = tcp_init_cgroup, 2648d1a4c0b3SGlauber Costa .destroy_cgroup = tcp_destroy_cgroup, 2649d1a4c0b3SGlauber Costa .proto_cgroup = tcp_proto_cgroup, 2650d1a4c0b3SGlauber Costa #endif 26511da177e4SLinus Torvalds }; 26524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 26531da177e4SLinus Torvalds 2654046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net) 2655046ee902SDenis V. Lunev { 2656046ee902SDenis V. Lunev return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2657046ee902SDenis V. Lunev PF_INET, SOCK_RAW, IPPROTO_TCP, net); 2658046ee902SDenis V. Lunev } 2659046ee902SDenis V. Lunev 2660046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2661046ee902SDenis V. Lunev { 2662046ee902SDenis V. Lunev inet_ctl_sock_destroy(net->ipv4.tcp_sock); 2663b099ce26SEric W. Biederman } 2664b099ce26SEric W. Biederman 2665b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2666b099ce26SEric W. Biederman { 2667b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2668046ee902SDenis V. Lunev } 2669046ee902SDenis V. Lunev 2670046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2671046ee902SDenis V. Lunev .init = tcp_sk_init, 2672046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2673b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2674046ee902SDenis V. Lunev }; 2675046ee902SDenis V. Lunev 26769b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 26771da177e4SLinus Torvalds { 26785caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 26796a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 26801da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 26811da177e4SLinus Torvalds } 2682