11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * IPv4 specific functions 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * code split from: 141da177e4SLinus Torvalds * linux/ipv4/tcp.c 151da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 161da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * See tcp.c for author information 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 211da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 221da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 231da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 241da177e4SLinus Torvalds */ 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * Changes: 281da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 291da177e4SLinus Torvalds * This code is dedicated to John Dyson. 301da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 311da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 321da177e4SLinus Torvalds * and the rest go in the other half. 331da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 341da177e4SLinus Torvalds * some bugs: ip options weren't passed to 351da177e4SLinus Torvalds * the TCP layer, missed a check for an 361da177e4SLinus Torvalds * ACK bit. 371da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 381da177e4SLinus Torvalds * Fixed many serious bugs in the 3960236fddSArnaldo Carvalho de Melo * request_sock handling and moved 401da177e4SLinus Torvalds * most of it into the af independent code. 411da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 42caa20d9aSStephen Hemminger * Added new listen semantics. 431da177e4SLinus Torvalds * Mike McLagan : Routing by source 441da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 451da177e4SLinus Torvalds * Andi Kleen: various fixes. 461da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 471da177e4SLinus Torvalds * coma. 481da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 491da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 501da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 511da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 521da177e4SLinus Torvalds * a single port at the same time. 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds #include <net/icmp.h> 66304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 671da177e4SLinus Torvalds #include <net/tcp.h> 6820380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 691da177e4SLinus Torvalds #include <net/ipv6.h> 701da177e4SLinus Torvalds #include <net/inet_common.h> 716d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 721da177e4SLinus Torvalds #include <net/xfrm.h> 731a2449a8SChris Leech #include <net/netdma.h> 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #include <linux/inet.h> 761da177e4SLinus Torvalds #include <linux/ipv6.h> 771da177e4SLinus Torvalds #include <linux/stddef.h> 781da177e4SLinus Torvalds #include <linux/proc_fs.h> 791da177e4SLinus Torvalds #include <linux/seq_file.h> 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds int sysctl_tcp_tw_reuse; 821da177e4SLinus Torvalds int sysctl_tcp_low_latency; 831da177e4SLinus Torvalds 841da177e4SLinus Torvalds /* Check TCP sequence numbers in ICMP packets. */ 851da177e4SLinus Torvalds #define ICMP_MIN_LENGTH 8 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds /* Socket used for sending RSTs */ 881da177e4SLinus Torvalds static struct socket *tcp_socket; 891da177e4SLinus Torvalds 908292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 911da177e4SLinus Torvalds 920f7ff927SArnaldo Carvalho de Melo struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 93e4d91918SIngo Molnar .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), 940f7ff927SArnaldo Carvalho de Melo .lhash_users = ATOMIC_INIT(0), 950f7ff927SArnaldo Carvalho de Melo .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 961da177e4SLinus Torvalds }; 971da177e4SLinus Torvalds 98463c84b9SArnaldo Carvalho de Melo static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 99463c84b9SArnaldo Carvalho de Melo { 100971af18bSArnaldo Carvalho de Melo return inet_csk_get_port(&tcp_hashinfo, sk, snum, 101971af18bSArnaldo Carvalho de Melo inet_csk_bind_conflict); 102463c84b9SArnaldo Carvalho de Melo } 103463c84b9SArnaldo Carvalho de Melo 1041da177e4SLinus Torvalds static void tcp_v4_hash(struct sock *sk) 1051da177e4SLinus Torvalds { 10681849d10SArnaldo Carvalho de Melo inet_hash(&tcp_hashinfo, sk); 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds void tcp_unhash(struct sock *sk) 1101da177e4SLinus Torvalds { 11181849d10SArnaldo Carvalho de Melo inet_unhash(&tcp_hashinfo, sk); 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) 1151da177e4SLinus Torvalds { 1161da177e4SLinus Torvalds return secure_tcp_sequence_number(skb->nh.iph->daddr, 1171da177e4SLinus Torvalds skb->nh.iph->saddr, 1181da177e4SLinus Torvalds skb->h.th->dest, 1191da177e4SLinus Torvalds skb->h.th->source); 1201da177e4SLinus Torvalds } 1211da177e4SLinus Torvalds 1226d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1236d6ee43eSArnaldo Carvalho de Melo { 1246d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1256d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1266d6ee43eSArnaldo Carvalho de Melo 1276d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1286d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1296d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1306d6ee43eSArnaldo Carvalho de Melo 1316d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1326d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1336d6ee43eSArnaldo Carvalho de Melo holder. 1346d6ee43eSArnaldo Carvalho de Melo 1356d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1366d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1376d6ee43eSArnaldo Carvalho de Melo */ 1386d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1396d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1406d6ee43eSArnaldo Carvalho de Melo xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { 1416d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1426d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1436d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1446d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1456d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1466d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1476d6ee43eSArnaldo Carvalho de Melo return 1; 1486d6ee43eSArnaldo Carvalho de Melo } 1496d6ee43eSArnaldo Carvalho de Melo 1506d6ee43eSArnaldo Carvalho de Melo return 0; 1516d6ee43eSArnaldo Carvalho de Melo } 1526d6ee43eSArnaldo Carvalho de Melo 1536d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1546d6ee43eSArnaldo Carvalho de Melo 1551da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1561da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1571da177e4SLinus Torvalds { 1581da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1591da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 1601da177e4SLinus Torvalds struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1611da177e4SLinus Torvalds struct rtable *rt; 1621da177e4SLinus Torvalds u32 daddr, nexthop; 1631da177e4SLinus Torvalds int tmp; 1641da177e4SLinus Torvalds int err; 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1671da177e4SLinus Torvalds return -EINVAL; 1681da177e4SLinus Torvalds 1691da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1701da177e4SLinus Torvalds return -EAFNOSUPPORT; 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 1731da177e4SLinus Torvalds if (inet->opt && inet->opt->srr) { 1741da177e4SLinus Torvalds if (!daddr) 1751da177e4SLinus Torvalds return -EINVAL; 1761da177e4SLinus Torvalds nexthop = inet->opt->faddr; 1771da177e4SLinus Torvalds } 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds tmp = ip_route_connect(&rt, nexthop, inet->saddr, 1801da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1811da177e4SLinus Torvalds IPPROTO_TCP, 1821da177e4SLinus Torvalds inet->sport, usin->sin_port, sk); 1831da177e4SLinus Torvalds if (tmp < 0) 1841da177e4SLinus Torvalds return tmp; 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1871da177e4SLinus Torvalds ip_rt_put(rt); 1881da177e4SLinus Torvalds return -ENETUNREACH; 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds if (!inet->opt || !inet->opt->srr) 1921da177e4SLinus Torvalds daddr = rt->rt_dst; 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds if (!inet->saddr) 1951da177e4SLinus Torvalds inet->saddr = rt->rt_src; 1961da177e4SLinus Torvalds inet->rcv_saddr = inet->saddr; 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { 1991da177e4SLinus Torvalds /* Reset inherited state */ 2001da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 2011da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 2021da177e4SLinus Torvalds tp->write_seq = 0; 2031da177e4SLinus Torvalds } 2041da177e4SLinus Torvalds 205295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 2061da177e4SLinus Torvalds !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 2071da177e4SLinus Torvalds struct inet_peer *peer = rt_get_peer(rt); 2081da177e4SLinus Torvalds 2091da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen from 2101da177e4SLinus Torvalds * the destination in peer table, when entering state TIME-WAIT 2111da177e4SLinus Torvalds * and initialize rx_opt.ts_recent from it, when trying new connection. 2121da177e4SLinus Torvalds */ 2131da177e4SLinus Torvalds 2141da177e4SLinus Torvalds if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { 2151da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 2161da177e4SLinus Torvalds tp->rx_opt.ts_recent = peer->tcp_ts; 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds inet->dport = usin->sin_port; 2211da177e4SLinus Torvalds inet->daddr = daddr; 2221da177e4SLinus Torvalds 223d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 2241da177e4SLinus Torvalds if (inet->opt) 225d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds tp->rx_opt.mss_clamp = 536; 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2301da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2311da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2321da177e4SLinus Torvalds * complete initialization after this. 2331da177e4SLinus Torvalds */ 2341da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 235a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2361da177e4SLinus Torvalds if (err) 2371da177e4SLinus Torvalds goto failure; 2381da177e4SLinus Torvalds 2395d39a795SPatrick McHardy err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk); 2401da177e4SLinus Torvalds if (err) 2411da177e4SLinus Torvalds goto failure; 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 244bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 2456cbb0df7SArnaldo Carvalho de Melo sk_setup_caps(sk, &rt->u.dst); 2461da177e4SLinus Torvalds 2471da177e4SLinus Torvalds if (!tp->write_seq) 2481da177e4SLinus Torvalds tp->write_seq = secure_tcp_sequence_number(inet->saddr, 2491da177e4SLinus Torvalds inet->daddr, 2501da177e4SLinus Torvalds inet->sport, 2511da177e4SLinus Torvalds usin->sin_port); 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds inet->id = tp->write_seq ^ jiffies; 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds err = tcp_connect(sk); 2561da177e4SLinus Torvalds rt = NULL; 2571da177e4SLinus Torvalds if (err) 2581da177e4SLinus Torvalds goto failure; 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds return 0; 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds failure: 2631da177e4SLinus Torvalds /* This unhashes the socket and releases the local port, if necessary. */ 2641da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2651da177e4SLinus Torvalds ip_rt_put(rt); 2661da177e4SLinus Torvalds sk->sk_route_caps = 0; 2671da177e4SLinus Torvalds inet->dport = 0; 2681da177e4SLinus Torvalds return err; 2691da177e4SLinus Torvalds } 2701da177e4SLinus Torvalds 2711da177e4SLinus Torvalds /* 2721da177e4SLinus Torvalds * This routine does path mtu discovery as defined in RFC1191. 2731da177e4SLinus Torvalds */ 27440efc6faSStephen Hemminger static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 2751da177e4SLinus Torvalds { 2761da177e4SLinus Torvalds struct dst_entry *dst; 2771da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 2801da177e4SLinus Torvalds * send out by Linux are always <576bytes so they should go through 2811da177e4SLinus Torvalds * unfragmented). 2821da177e4SLinus Torvalds */ 2831da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) 2841da177e4SLinus Torvalds return; 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds /* We don't check in the destentry if pmtu discovery is forbidden 2871da177e4SLinus Torvalds * on this route. We just assume that no packet_to_big packets 2881da177e4SLinus Torvalds * are send back when pmtu discovery is not active. 2891da177e4SLinus Torvalds * There is a small race when the user changes this flag in the 2901da177e4SLinus Torvalds * route, but I think that's acceptable. 2911da177e4SLinus Torvalds */ 2921da177e4SLinus Torvalds if ((dst = __sk_dst_check(sk, 0)) == NULL) 2931da177e4SLinus Torvalds return; 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds dst->ops->update_pmtu(dst, mtu); 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 2981da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 2991da177e4SLinus Torvalds */ 3001da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 3011da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds mtu = dst_mtu(dst); 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 306d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3071da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds /* Resend the TCP packet because it's 3101da177e4SLinus Torvalds * clear that the old packet has been 3111da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3121da177e4SLinus Torvalds * discovery. 3131da177e4SLinus Torvalds */ 3141da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3151da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds /* 3191da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3201da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3211da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3221da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3231da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3241da177e4SLinus Torvalds * to find the appropriate port. 3251da177e4SLinus Torvalds * 3261da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3271da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3281da177e4SLinus Torvalds * and for some paths there is no check at all. 3291da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3301da177e4SLinus Torvalds * is probably better. 3311da177e4SLinus Torvalds * 3321da177e4SLinus Torvalds */ 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds void tcp_v4_err(struct sk_buff *skb, u32 info) 3351da177e4SLinus Torvalds { 3361da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr *)skb->data; 3371da177e4SLinus Torvalds struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 3381da177e4SLinus Torvalds struct tcp_sock *tp; 3391da177e4SLinus Torvalds struct inet_sock *inet; 3401da177e4SLinus Torvalds int type = skb->h.icmph->type; 3411da177e4SLinus Torvalds int code = skb->h.icmph->code; 3421da177e4SLinus Torvalds struct sock *sk; 3431da177e4SLinus Torvalds __u32 seq; 3441da177e4SLinus Torvalds int err; 3451da177e4SLinus Torvalds 3461da177e4SLinus Torvalds if (skb->len < (iph->ihl << 2) + 8) { 3471da177e4SLinus Torvalds ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 3481da177e4SLinus Torvalds return; 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 351e48c414eSArnaldo Carvalho de Melo sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, 352463c84b9SArnaldo Carvalho de Melo th->source, inet_iif(skb)); 3531da177e4SLinus Torvalds if (!sk) { 3541da177e4SLinus Torvalds ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 3551da177e4SLinus Torvalds return; 3561da177e4SLinus Torvalds } 3571da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3588feaf0c0SArnaldo Carvalho de Melo inet_twsk_put((struct inet_timewait_sock *)sk); 3591da177e4SLinus Torvalds return; 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds bh_lock_sock(sk); 3631da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3641da177e4SLinus Torvalds * servers this needs to be solved differently. 3651da177e4SLinus Torvalds */ 3661da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 3671da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3701da177e4SLinus Torvalds goto out; 3711da177e4SLinus Torvalds 3721da177e4SLinus Torvalds tp = tcp_sk(sk); 3731da177e4SLinus Torvalds seq = ntohl(th->seq); 3741da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 3751da177e4SLinus Torvalds !between(seq, tp->snd_una, tp->snd_nxt)) { 3761da177e4SLinus Torvalds NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); 3771da177e4SLinus Torvalds goto out; 3781da177e4SLinus Torvalds } 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds switch (type) { 3811da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 3821da177e4SLinus Torvalds /* Just silently ignore these. */ 3831da177e4SLinus Torvalds goto out; 3841da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 3851da177e4SLinus Torvalds err = EPROTO; 3861da177e4SLinus Torvalds break; 3871da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 3881da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 3891da177e4SLinus Torvalds goto out; 3901da177e4SLinus Torvalds 3911da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 3921da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) 3931da177e4SLinus Torvalds do_pmtu_discovery(sk, iph, info); 3941da177e4SLinus Torvalds goto out; 3951da177e4SLinus Torvalds } 3961da177e4SLinus Torvalds 3971da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 3981da177e4SLinus Torvalds break; 3991da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4001da177e4SLinus Torvalds err = EHOSTUNREACH; 4011da177e4SLinus Torvalds break; 4021da177e4SLinus Torvalds default: 4031da177e4SLinus Torvalds goto out; 4041da177e4SLinus Torvalds } 4051da177e4SLinus Torvalds 4061da177e4SLinus Torvalds switch (sk->sk_state) { 40760236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4081da177e4SLinus Torvalds case TCP_LISTEN: 4091da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4101da177e4SLinus Torvalds goto out; 4111da177e4SLinus Torvalds 412463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4131da177e4SLinus Torvalds iph->daddr, iph->saddr); 4141da177e4SLinus Torvalds if (!req) 4151da177e4SLinus Torvalds goto out; 4161da177e4SLinus Torvalds 4171da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4181da177e4SLinus Torvalds an established socket here. 4191da177e4SLinus Torvalds */ 4201da177e4SLinus Torvalds BUG_TRAP(!req->sk); 4211da177e4SLinus Torvalds 4222e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 4231da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 4241da177e4SLinus Torvalds goto out; 4251da177e4SLinus Torvalds } 4261da177e4SLinus Torvalds 4271da177e4SLinus Torvalds /* 4281da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 4291da177e4SLinus Torvalds * There is no good way to pass the error to the newly 4301da177e4SLinus Torvalds * created socket, and POSIX does not want network 4311da177e4SLinus Torvalds * errors returned from accept(). 4321da177e4SLinus Torvalds */ 433463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 4341da177e4SLinus Torvalds goto out; 4351da177e4SLinus Torvalds 4361da177e4SLinus Torvalds case TCP_SYN_SENT: 4371da177e4SLinus Torvalds case TCP_SYN_RECV: /* Cannot happen. 4381da177e4SLinus Torvalds It can f.e. if SYNs crossed. 4391da177e4SLinus Torvalds */ 4401da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 4411da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 4421da177e4SLinus Torvalds sk->sk_err = err; 4431da177e4SLinus Torvalds 4441da177e4SLinus Torvalds sk->sk_error_report(sk); 4451da177e4SLinus Torvalds 4461da177e4SLinus Torvalds tcp_done(sk); 4471da177e4SLinus Torvalds } else { 4481da177e4SLinus Torvalds sk->sk_err_soft = err; 4491da177e4SLinus Torvalds } 4501da177e4SLinus Torvalds goto out; 4511da177e4SLinus Torvalds } 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds /* If we've already connected we will keep trying 4541da177e4SLinus Torvalds * until we time out, or the user gives up. 4551da177e4SLinus Torvalds * 4561da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 4571da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 4581da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 4591da177e4SLinus Torvalds * 4601da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 4611da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 4621da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 4631da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 4641da177e4SLinus Torvalds * 4651da177e4SLinus Torvalds * Now we are in compliance with RFCs. 4661da177e4SLinus Torvalds * --ANK (980905) 4671da177e4SLinus Torvalds */ 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds inet = inet_sk(sk); 4701da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 4711da177e4SLinus Torvalds sk->sk_err = err; 4721da177e4SLinus Torvalds sk->sk_error_report(sk); 4731da177e4SLinus Torvalds } else { /* Only an error on timeout */ 4741da177e4SLinus Torvalds sk->sk_err_soft = err; 4751da177e4SLinus Torvalds } 4761da177e4SLinus Torvalds 4771da177e4SLinus Torvalds out: 4781da177e4SLinus Torvalds bh_unlock_sock(sk); 4791da177e4SLinus Torvalds sock_put(sk); 4801da177e4SLinus Torvalds } 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds /* This routine computes an IPv4 TCP checksum. */ 4838292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 4841da177e4SLinus Torvalds { 4851da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 4868292a17aSArnaldo Carvalho de Melo struct tcphdr *th = skb->h.th; 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_HW) { 4891da177e4SLinus Torvalds th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); 4901da177e4SLinus Torvalds skb->csum = offsetof(struct tcphdr, check); 4911da177e4SLinus Torvalds } else { 4921da177e4SLinus Torvalds th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, 4931da177e4SLinus Torvalds csum_partial((char *)th, 4941da177e4SLinus Torvalds th->doff << 2, 4951da177e4SLinus Torvalds skb->csum)); 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds } 4981da177e4SLinus Torvalds 499*a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb) 500*a430a43dSHerbert Xu { 501*a430a43dSHerbert Xu struct iphdr *iph; 502*a430a43dSHerbert Xu struct tcphdr *th; 503*a430a43dSHerbert Xu 504*a430a43dSHerbert Xu if (!pskb_may_pull(skb, sizeof(*th))) 505*a430a43dSHerbert Xu return -EINVAL; 506*a430a43dSHerbert Xu 507*a430a43dSHerbert Xu iph = skb->nh.iph; 508*a430a43dSHerbert Xu th = skb->h.th; 509*a430a43dSHerbert Xu 510*a430a43dSHerbert Xu th->check = 0; 511*a430a43dSHerbert Xu th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); 512*a430a43dSHerbert Xu skb->csum = offsetof(struct tcphdr, check); 513*a430a43dSHerbert Xu skb->ip_summed = CHECKSUM_HW; 514*a430a43dSHerbert Xu return 0; 515*a430a43dSHerbert Xu } 516*a430a43dSHerbert Xu 5171da177e4SLinus Torvalds /* 5181da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5191da177e4SLinus Torvalds * 5201da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5211da177e4SLinus Torvalds * for reset. 5221da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5231da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5241da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5251da177e4SLinus Torvalds * So that we build reply only basing on parameters 5261da177e4SLinus Torvalds * arrived with segment. 5271da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5281da177e4SLinus Torvalds */ 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds static void tcp_v4_send_reset(struct sk_buff *skb) 5311da177e4SLinus Torvalds { 5321da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 5331da177e4SLinus Torvalds struct tcphdr rth; 5341da177e4SLinus Torvalds struct ip_reply_arg arg; 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 5371da177e4SLinus Torvalds if (th->rst) 5381da177e4SLinus Torvalds return; 5391da177e4SLinus Torvalds 5401da177e4SLinus Torvalds if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) 5411da177e4SLinus Torvalds return; 5421da177e4SLinus Torvalds 5431da177e4SLinus Torvalds /* Swap the send and the receive. */ 5441da177e4SLinus Torvalds memset(&rth, 0, sizeof(struct tcphdr)); 5451da177e4SLinus Torvalds rth.dest = th->source; 5461da177e4SLinus Torvalds rth.source = th->dest; 5471da177e4SLinus Torvalds rth.doff = sizeof(struct tcphdr) / 4; 5481da177e4SLinus Torvalds rth.rst = 1; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds if (th->ack) { 5511da177e4SLinus Torvalds rth.seq = th->ack_seq; 5521da177e4SLinus Torvalds } else { 5531da177e4SLinus Torvalds rth.ack = 1; 5541da177e4SLinus Torvalds rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 5551da177e4SLinus Torvalds skb->len - (th->doff << 2)); 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds memset(&arg, 0, sizeof arg); 5591da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rth; 5601da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof rth; 5611da177e4SLinus Torvalds arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 5621da177e4SLinus Torvalds skb->nh.iph->saddr, /*XXX*/ 5631da177e4SLinus Torvalds sizeof(struct tcphdr), IPPROTO_TCP, 0); 5641da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 5651da177e4SLinus Torvalds 5661da177e4SLinus Torvalds ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); 5671da177e4SLinus Torvalds 5681da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 5691da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 5701da177e4SLinus Torvalds } 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 5731da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 5741da177e4SLinus Torvalds */ 5751da177e4SLinus Torvalds 5761da177e4SLinus Torvalds static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 5771da177e4SLinus Torvalds u32 win, u32 ts) 5781da177e4SLinus Torvalds { 5791da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 5801da177e4SLinus Torvalds struct { 5811da177e4SLinus Torvalds struct tcphdr th; 5821da177e4SLinus Torvalds u32 tsopt[3]; 5831da177e4SLinus Torvalds } rep; 5841da177e4SLinus Torvalds struct ip_reply_arg arg; 5851da177e4SLinus Torvalds 5861da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 5871da177e4SLinus Torvalds memset(&arg, 0, sizeof arg); 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 5901da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 5911da177e4SLinus Torvalds if (ts) { 5921da177e4SLinus Torvalds rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 5931da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 5941da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 5951da177e4SLinus Torvalds rep.tsopt[1] = htonl(tcp_time_stamp); 5961da177e4SLinus Torvalds rep.tsopt[2] = htonl(ts); 5971da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep); 5981da177e4SLinus Torvalds } 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds /* Swap the send and the receive. */ 6011da177e4SLinus Torvalds rep.th.dest = th->source; 6021da177e4SLinus Torvalds rep.th.source = th->dest; 6031da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 6041da177e4SLinus Torvalds rep.th.seq = htonl(seq); 6051da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 6061da177e4SLinus Torvalds rep.th.ack = 1; 6071da177e4SLinus Torvalds rep.th.window = htons(win); 6081da177e4SLinus Torvalds 6091da177e4SLinus Torvalds arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 6101da177e4SLinus Torvalds skb->nh.iph->saddr, /*XXX*/ 6111da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 6121da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 6151da177e4SLinus Torvalds 6161da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 6171da177e4SLinus Torvalds } 6181da177e4SLinus Torvalds 6191da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 6201da177e4SLinus Torvalds { 6218feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 6228feaf0c0SArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 6231da177e4SLinus Torvalds 6248feaf0c0SArnaldo Carvalho de Melo tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 6258feaf0c0SArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); 6261da177e4SLinus Torvalds 6278feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds 63060236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) 6311da177e4SLinus Torvalds { 6322e6599cbSArnaldo Carvalho de Melo tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 6331da177e4SLinus Torvalds req->ts_recent); 6341da177e4SLinus Torvalds } 6351da177e4SLinus Torvalds 6361da177e4SLinus Torvalds /* 6371da177e4SLinus Torvalds * Send a SYN-ACK after having received an ACK. 63860236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 6391da177e4SLinus Torvalds * socket. 6401da177e4SLinus Torvalds */ 64160236fddSArnaldo Carvalho de Melo static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 6421da177e4SLinus Torvalds struct dst_entry *dst) 6431da177e4SLinus Torvalds { 6442e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 6451da177e4SLinus Torvalds int err = -1; 6461da177e4SLinus Torvalds struct sk_buff * skb; 6471da177e4SLinus Torvalds 6481da177e4SLinus Torvalds /* First, grab a route. */ 649463c84b9SArnaldo Carvalho de Melo if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 6501da177e4SLinus Torvalds goto out; 6511da177e4SLinus Torvalds 6521da177e4SLinus Torvalds skb = tcp_make_synack(sk, dst, req); 6531da177e4SLinus Torvalds 6541da177e4SLinus Torvalds if (skb) { 6551da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 6561da177e4SLinus Torvalds 6571da177e4SLinus Torvalds th->check = tcp_v4_check(th, skb->len, 6582e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 6592e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 6601da177e4SLinus Torvalds csum_partial((char *)th, skb->len, 6611da177e4SLinus Torvalds skb->csum)); 6621da177e4SLinus Torvalds 6632e6599cbSArnaldo Carvalho de Melo err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 6642e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 6652e6599cbSArnaldo Carvalho de Melo ireq->opt); 6661da177e4SLinus Torvalds if (err == NET_XMIT_CN) 6671da177e4SLinus Torvalds err = 0; 6681da177e4SLinus Torvalds } 6691da177e4SLinus Torvalds 6701da177e4SLinus Torvalds out: 6711da177e4SLinus Torvalds dst_release(dst); 6721da177e4SLinus Torvalds return err; 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds 6751da177e4SLinus Torvalds /* 67660236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 6771da177e4SLinus Torvalds */ 67860236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 6791da177e4SLinus Torvalds { 6802e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 6811da177e4SLinus Torvalds } 6821da177e4SLinus Torvalds 68380e40daaSArnaldo Carvalho de Melo #ifdef CONFIG_SYN_COOKIES 68440efc6faSStephen Hemminger static void syn_flood_warning(struct sk_buff *skb) 6851da177e4SLinus Torvalds { 6861da177e4SLinus Torvalds static unsigned long warntime; 6871da177e4SLinus Torvalds 6881da177e4SLinus Torvalds if (time_after(jiffies, (warntime + HZ * 60))) { 6891da177e4SLinus Torvalds warntime = jiffies; 6901da177e4SLinus Torvalds printk(KERN_INFO 6911da177e4SLinus Torvalds "possible SYN flooding on port %d. Sending cookies.\n", 6921da177e4SLinus Torvalds ntohs(skb->h.th->dest)); 6931da177e4SLinus Torvalds } 6941da177e4SLinus Torvalds } 69580e40daaSArnaldo Carvalho de Melo #endif 6961da177e4SLinus Torvalds 6971da177e4SLinus Torvalds /* 69860236fddSArnaldo Carvalho de Melo * Save and compile IPv4 options into the request_sock if needed. 6991da177e4SLinus Torvalds */ 70040efc6faSStephen Hemminger static struct ip_options *tcp_v4_save_options(struct sock *sk, 7011da177e4SLinus Torvalds struct sk_buff *skb) 7021da177e4SLinus Torvalds { 7031da177e4SLinus Torvalds struct ip_options *opt = &(IPCB(skb)->opt); 7041da177e4SLinus Torvalds struct ip_options *dopt = NULL; 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds if (opt && opt->optlen) { 7071da177e4SLinus Torvalds int opt_size = optlength(opt); 7081da177e4SLinus Torvalds dopt = kmalloc(opt_size, GFP_ATOMIC); 7091da177e4SLinus Torvalds if (dopt) { 7101da177e4SLinus Torvalds if (ip_options_echo(dopt, skb)) { 7111da177e4SLinus Torvalds kfree(dopt); 7121da177e4SLinus Torvalds dopt = NULL; 7131da177e4SLinus Torvalds } 7141da177e4SLinus Torvalds } 7151da177e4SLinus Torvalds } 7161da177e4SLinus Torvalds return dopt; 7171da177e4SLinus Torvalds } 7181da177e4SLinus Torvalds 71960236fddSArnaldo Carvalho de Melo struct request_sock_ops tcp_request_sock_ops = { 7201da177e4SLinus Torvalds .family = PF_INET, 7212e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 7221da177e4SLinus Torvalds .rtx_syn_ack = tcp_v4_send_synack, 72360236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 72460236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 7251da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 7261da177e4SLinus Torvalds }; 7271da177e4SLinus Torvalds 7286d6ee43eSArnaldo Carvalho de Melo static struct timewait_sock_ops tcp_timewait_sock_ops = { 7296d6ee43eSArnaldo Carvalho de Melo .twsk_obj_size = sizeof(struct tcp_timewait_sock), 7306d6ee43eSArnaldo Carvalho de Melo .twsk_unique = tcp_twsk_unique, 7316d6ee43eSArnaldo Carvalho de Melo }; 7326d6ee43eSArnaldo Carvalho de Melo 7331da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 7341da177e4SLinus Torvalds { 7352e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 7361da177e4SLinus Torvalds struct tcp_options_received tmp_opt; 73760236fddSArnaldo Carvalho de Melo struct request_sock *req; 7381da177e4SLinus Torvalds __u32 saddr = skb->nh.iph->saddr; 7391da177e4SLinus Torvalds __u32 daddr = skb->nh.iph->daddr; 7401da177e4SLinus Torvalds __u32 isn = TCP_SKB_CB(skb)->when; 7411da177e4SLinus Torvalds struct dst_entry *dst = NULL; 7421da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 7431da177e4SLinus Torvalds int want_cookie = 0; 7441da177e4SLinus Torvalds #else 7451da177e4SLinus Torvalds #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ 7461da177e4SLinus Torvalds #endif 7471da177e4SLinus Torvalds 7481da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 7491da177e4SLinus Torvalds if (((struct rtable *)skb->dst)->rt_flags & 7501da177e4SLinus Torvalds (RTCF_BROADCAST | RTCF_MULTICAST)) 7511da177e4SLinus Torvalds goto drop; 7521da177e4SLinus Torvalds 7531da177e4SLinus Torvalds /* TW buckets are converted to open requests without 7541da177e4SLinus Torvalds * limitations, they conserve resources and peer is 7551da177e4SLinus Torvalds * evidently real one. 7561da177e4SLinus Torvalds */ 757463c84b9SArnaldo Carvalho de Melo if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 7581da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 7591da177e4SLinus Torvalds if (sysctl_tcp_syncookies) { 7601da177e4SLinus Torvalds want_cookie = 1; 7611da177e4SLinus Torvalds } else 7621da177e4SLinus Torvalds #endif 7631da177e4SLinus Torvalds goto drop; 7641da177e4SLinus Torvalds } 7651da177e4SLinus Torvalds 7661da177e4SLinus Torvalds /* Accept backlog is full. If we have already queued enough 7671da177e4SLinus Torvalds * of warm entries in syn queue, drop request. It is better than 7681da177e4SLinus Torvalds * clogging syn queue with openreqs with exponentially increasing 7691da177e4SLinus Torvalds * timeout. 7701da177e4SLinus Torvalds */ 771463c84b9SArnaldo Carvalho de Melo if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 7721da177e4SLinus Torvalds goto drop; 7731da177e4SLinus Torvalds 77460236fddSArnaldo Carvalho de Melo req = reqsk_alloc(&tcp_request_sock_ops); 7751da177e4SLinus Torvalds if (!req) 7761da177e4SLinus Torvalds goto drop; 7771da177e4SLinus Torvalds 7781da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 7791da177e4SLinus Torvalds tmp_opt.mss_clamp = 536; 7801da177e4SLinus Torvalds tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; 7811da177e4SLinus Torvalds 7821da177e4SLinus Torvalds tcp_parse_options(skb, &tmp_opt, 0); 7831da177e4SLinus Torvalds 7841da177e4SLinus Torvalds if (want_cookie) { 7851da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 7861da177e4SLinus Torvalds tmp_opt.saw_tstamp = 0; 7871da177e4SLinus Torvalds } 7881da177e4SLinus Torvalds 7891da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { 7901da177e4SLinus Torvalds /* Some OSes (unknown ones, but I see them on web server, which 7911da177e4SLinus Torvalds * contains information interesting only for windows' 7921da177e4SLinus Torvalds * users) do not send their stamp in SYN. It is easy case. 7931da177e4SLinus Torvalds * We simply do not advertise TS support. 7941da177e4SLinus Torvalds */ 7951da177e4SLinus Torvalds tmp_opt.saw_tstamp = 0; 7961da177e4SLinus Torvalds tmp_opt.tstamp_ok = 0; 7971da177e4SLinus Torvalds } 7981da177e4SLinus Torvalds tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 7991da177e4SLinus Torvalds 8001da177e4SLinus Torvalds tcp_openreq_init(req, &tmp_opt, skb); 8011da177e4SLinus Torvalds 8022e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 8032e6599cbSArnaldo Carvalho de Melo ireq->loc_addr = daddr; 8042e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr = saddr; 8052e6599cbSArnaldo Carvalho de Melo ireq->opt = tcp_v4_save_options(sk, skb); 8061da177e4SLinus Torvalds if (!want_cookie) 8071da177e4SLinus Torvalds TCP_ECN_create_request(req, skb->h.th); 8081da177e4SLinus Torvalds 8091da177e4SLinus Torvalds if (want_cookie) { 8101da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 8111da177e4SLinus Torvalds syn_flood_warning(skb); 8121da177e4SLinus Torvalds #endif 8131da177e4SLinus Torvalds isn = cookie_v4_init_sequence(sk, skb, &req->mss); 8141da177e4SLinus Torvalds } else if (!isn) { 8151da177e4SLinus Torvalds struct inet_peer *peer = NULL; 8161da177e4SLinus Torvalds 8171da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen 8181da177e4SLinus Torvalds * from the destination in peer table, when entering 8191da177e4SLinus Torvalds * state TIME-WAIT, and check against it before 8201da177e4SLinus Torvalds * accepting new connection request. 8211da177e4SLinus Torvalds * 8221da177e4SLinus Torvalds * If "isn" is not zero, this request hit alive 8231da177e4SLinus Torvalds * timewait bucket, so that all the necessary checks 8241da177e4SLinus Torvalds * are made in the function processing timewait state. 8251da177e4SLinus Torvalds */ 8261da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && 827295ff7edSArnaldo Carvalho de Melo tcp_death_row.sysctl_tw_recycle && 828463c84b9SArnaldo Carvalho de Melo (dst = inet_csk_route_req(sk, req)) != NULL && 8291da177e4SLinus Torvalds (peer = rt_get_peer((struct rtable *)dst)) != NULL && 8301da177e4SLinus Torvalds peer->v4daddr == saddr) { 8311da177e4SLinus Torvalds if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && 8321da177e4SLinus Torvalds (s32)(peer->tcp_ts - req->ts_recent) > 8331da177e4SLinus Torvalds TCP_PAWS_WINDOW) { 8341da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 8351da177e4SLinus Torvalds dst_release(dst); 8361da177e4SLinus Torvalds goto drop_and_free; 8371da177e4SLinus Torvalds } 8381da177e4SLinus Torvalds } 8391da177e4SLinus Torvalds /* Kill the following clause, if you dislike this way. */ 8401da177e4SLinus Torvalds else if (!sysctl_tcp_syncookies && 841463c84b9SArnaldo Carvalho de Melo (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 8421da177e4SLinus Torvalds (sysctl_max_syn_backlog >> 2)) && 8431da177e4SLinus Torvalds (!peer || !peer->tcp_ts_stamp) && 8441da177e4SLinus Torvalds (!dst || !dst_metric(dst, RTAX_RTT))) { 8451da177e4SLinus Torvalds /* Without syncookies last quarter of 8461da177e4SLinus Torvalds * backlog is filled with destinations, 8471da177e4SLinus Torvalds * proven to be alive. 8481da177e4SLinus Torvalds * It means that we continue to communicate 8491da177e4SLinus Torvalds * to destinations, already remembered 8501da177e4SLinus Torvalds * to the moment of synflood. 8511da177e4SLinus Torvalds */ 85264ce2073SPatrick McHardy LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " 85364ce2073SPatrick McHardy "request from %u.%u.%u.%u/%u\n", 8541da177e4SLinus Torvalds NIPQUAD(saddr), 85564ce2073SPatrick McHardy ntohs(skb->h.th->source)); 8561da177e4SLinus Torvalds dst_release(dst); 8571da177e4SLinus Torvalds goto drop_and_free; 8581da177e4SLinus Torvalds } 8591da177e4SLinus Torvalds 8601da177e4SLinus Torvalds isn = tcp_v4_init_sequence(sk, skb); 8611da177e4SLinus Torvalds } 8622e6599cbSArnaldo Carvalho de Melo tcp_rsk(req)->snt_isn = isn; 8631da177e4SLinus Torvalds 8641da177e4SLinus Torvalds if (tcp_v4_send_synack(sk, req, dst)) 8651da177e4SLinus Torvalds goto drop_and_free; 8661da177e4SLinus Torvalds 8671da177e4SLinus Torvalds if (want_cookie) { 86860236fddSArnaldo Carvalho de Melo reqsk_free(req); 8691da177e4SLinus Torvalds } else { 8703f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 8711da177e4SLinus Torvalds } 8721da177e4SLinus Torvalds return 0; 8731da177e4SLinus Torvalds 8741da177e4SLinus Torvalds drop_and_free: 87560236fddSArnaldo Carvalho de Melo reqsk_free(req); 8761da177e4SLinus Torvalds drop: 8771da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 8781da177e4SLinus Torvalds return 0; 8791da177e4SLinus Torvalds } 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds /* 8831da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 8841da177e4SLinus Torvalds * now create the new socket. 8851da177e4SLinus Torvalds */ 8861da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 88760236fddSArnaldo Carvalho de Melo struct request_sock *req, 8881da177e4SLinus Torvalds struct dst_entry *dst) 8891da177e4SLinus Torvalds { 8902e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 8911da177e4SLinus Torvalds struct inet_sock *newinet; 8921da177e4SLinus Torvalds struct tcp_sock *newtp; 8931da177e4SLinus Torvalds struct sock *newsk; 8941da177e4SLinus Torvalds 8951da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 8961da177e4SLinus Torvalds goto exit_overflow; 8971da177e4SLinus Torvalds 898463c84b9SArnaldo Carvalho de Melo if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 8991da177e4SLinus Torvalds goto exit; 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 9021da177e4SLinus Torvalds if (!newsk) 9031da177e4SLinus Torvalds goto exit; 9041da177e4SLinus Torvalds 905bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 9066cbb0df7SArnaldo Carvalho de Melo sk_setup_caps(newsk, dst); 9071da177e4SLinus Torvalds 9081da177e4SLinus Torvalds newtp = tcp_sk(newsk); 9091da177e4SLinus Torvalds newinet = inet_sk(newsk); 9102e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 9112e6599cbSArnaldo Carvalho de Melo newinet->daddr = ireq->rmt_addr; 9122e6599cbSArnaldo Carvalho de Melo newinet->rcv_saddr = ireq->loc_addr; 9132e6599cbSArnaldo Carvalho de Melo newinet->saddr = ireq->loc_addr; 9142e6599cbSArnaldo Carvalho de Melo newinet->opt = ireq->opt; 9152e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 916463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 9171da177e4SLinus Torvalds newinet->mc_ttl = skb->nh.iph->ttl; 918d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 9191da177e4SLinus Torvalds if (newinet->opt) 920d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 9211da177e4SLinus Torvalds newinet->id = newtp->write_seq ^ jiffies; 9221da177e4SLinus Torvalds 9235d424d5aSJohn Heffner tcp_mtup_init(newsk); 9241da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 9251da177e4SLinus Torvalds newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 9261da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 9271da177e4SLinus Torvalds 928f3f05f70SArnaldo Carvalho de Melo __inet_hash(&tcp_hashinfo, newsk, 0); 9292d8c4ce5SArnaldo Carvalho de Melo __inet_inherit_port(&tcp_hashinfo, sk, newsk); 9301da177e4SLinus Torvalds 9311da177e4SLinus Torvalds return newsk; 9321da177e4SLinus Torvalds 9331da177e4SLinus Torvalds exit_overflow: 9341da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 9351da177e4SLinus Torvalds exit: 9361da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 9371da177e4SLinus Torvalds dst_release(dst); 9381da177e4SLinus Torvalds return NULL; 9391da177e4SLinus Torvalds } 9401da177e4SLinus Torvalds 9411da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 9421da177e4SLinus Torvalds { 9431da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 9441da177e4SLinus Torvalds struct iphdr *iph = skb->nh.iph; 9451da177e4SLinus Torvalds struct sock *nsk; 94660236fddSArnaldo Carvalho de Melo struct request_sock **prev; 9471da177e4SLinus Torvalds /* Find possible connection requests. */ 948463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 9491da177e4SLinus Torvalds iph->saddr, iph->daddr); 9501da177e4SLinus Torvalds if (req) 9511da177e4SLinus Torvalds return tcp_check_req(sk, skb, req, prev); 9521da177e4SLinus Torvalds 953e48c414eSArnaldo Carvalho de Melo nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, 954e48c414eSArnaldo Carvalho de Melo th->source, skb->nh.iph->daddr, 955463c84b9SArnaldo Carvalho de Melo ntohs(th->dest), inet_iif(skb)); 9561da177e4SLinus Torvalds 9571da177e4SLinus Torvalds if (nsk) { 9581da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 9591da177e4SLinus Torvalds bh_lock_sock(nsk); 9601da177e4SLinus Torvalds return nsk; 9611da177e4SLinus Torvalds } 9628feaf0c0SArnaldo Carvalho de Melo inet_twsk_put((struct inet_timewait_sock *)nsk); 9631da177e4SLinus Torvalds return NULL; 9641da177e4SLinus Torvalds } 9651da177e4SLinus Torvalds 9661da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 9671da177e4SLinus Torvalds if (!th->rst && !th->syn && th->ack) 9681da177e4SLinus Torvalds sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 9691da177e4SLinus Torvalds #endif 9701da177e4SLinus Torvalds return sk; 9711da177e4SLinus Torvalds } 9721da177e4SLinus Torvalds 9731da177e4SLinus Torvalds static int tcp_v4_checksum_init(struct sk_buff *skb) 9741da177e4SLinus Torvalds { 9751da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_HW) { 9761da177e4SLinus Torvalds if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, 977fb286bb2SHerbert Xu skb->nh.iph->daddr, skb->csum)) { 9781da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 979fb286bb2SHerbert Xu return 0; 980fb286bb2SHerbert Xu } 981fb286bb2SHerbert Xu } 982fb286bb2SHerbert Xu 983fb286bb2SHerbert Xu skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr, 984fb286bb2SHerbert Xu skb->len, IPPROTO_TCP, 0); 985fb286bb2SHerbert Xu 986fb286bb2SHerbert Xu if (skb->len <= 76) { 987fb286bb2SHerbert Xu return __skb_checksum_complete(skb); 9881da177e4SLinus Torvalds } 9891da177e4SLinus Torvalds return 0; 9901da177e4SLinus Torvalds } 9911da177e4SLinus Torvalds 9921da177e4SLinus Torvalds 9931da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 9941da177e4SLinus Torvalds * here. 9951da177e4SLinus Torvalds * 9961da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 9971da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 9981da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 9991da177e4SLinus Torvalds * held. 10001da177e4SLinus Torvalds */ 10011da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 10021da177e4SLinus Torvalds { 10031da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 10041da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 10051da177e4SLinus Torvalds if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) 10061da177e4SLinus Torvalds goto reset; 10071da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 10081da177e4SLinus Torvalds return 0; 10091da177e4SLinus Torvalds } 10101da177e4SLinus Torvalds 10111da177e4SLinus Torvalds if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb)) 10121da177e4SLinus Torvalds goto csum_err; 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 10151da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 10161da177e4SLinus Torvalds if (!nsk) 10171da177e4SLinus Torvalds goto discard; 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds if (nsk != sk) { 10201da177e4SLinus Torvalds if (tcp_child_process(sk, nsk, skb)) 10211da177e4SLinus Torvalds goto reset; 10221da177e4SLinus Torvalds return 0; 10231da177e4SLinus Torvalds } 10241da177e4SLinus Torvalds } 10251da177e4SLinus Torvalds 10261da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 10271da177e4SLinus Torvalds if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) 10281da177e4SLinus Torvalds goto reset; 10291da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 10301da177e4SLinus Torvalds return 0; 10311da177e4SLinus Torvalds 10321da177e4SLinus Torvalds reset: 10331da177e4SLinus Torvalds tcp_v4_send_reset(skb); 10341da177e4SLinus Torvalds discard: 10351da177e4SLinus Torvalds kfree_skb(skb); 10361da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 10371da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 10381da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 10391da177e4SLinus Torvalds * but you have been warned. 10401da177e4SLinus Torvalds */ 10411da177e4SLinus Torvalds return 0; 10421da177e4SLinus Torvalds 10431da177e4SLinus Torvalds csum_err: 10441da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 10451da177e4SLinus Torvalds goto discard; 10461da177e4SLinus Torvalds } 10471da177e4SLinus Torvalds 10481da177e4SLinus Torvalds /* 10491da177e4SLinus Torvalds * From tcp_input.c 10501da177e4SLinus Torvalds */ 10511da177e4SLinus Torvalds 10521da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 10531da177e4SLinus Torvalds { 10541da177e4SLinus Torvalds struct tcphdr *th; 10551da177e4SLinus Torvalds struct sock *sk; 10561da177e4SLinus Torvalds int ret; 10571da177e4SLinus Torvalds 10581da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 10591da177e4SLinus Torvalds goto discard_it; 10601da177e4SLinus Torvalds 10611da177e4SLinus Torvalds /* Count it even if it's bad */ 10621da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INSEGS); 10631da177e4SLinus Torvalds 10641da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 10651da177e4SLinus Torvalds goto discard_it; 10661da177e4SLinus Torvalds 10671da177e4SLinus Torvalds th = skb->h.th; 10681da177e4SLinus Torvalds 10691da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 10701da177e4SLinus Torvalds goto bad_packet; 10711da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 10721da177e4SLinus Torvalds goto discard_it; 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds /* An explanation is required here, I think. 10751da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1076caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 10771da177e4SLinus Torvalds * So, we defer the checks. */ 10781da177e4SLinus Torvalds if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1079fb286bb2SHerbert Xu tcp_v4_checksum_init(skb))) 10801da177e4SLinus Torvalds goto bad_packet; 10811da177e4SLinus Torvalds 10821da177e4SLinus Torvalds th = skb->h.th; 10831da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 10841da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 10851da177e4SLinus Torvalds skb->len - th->doff * 4); 10861da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 10871da177e4SLinus Torvalds TCP_SKB_CB(skb)->when = 0; 10881da177e4SLinus Torvalds TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; 10891da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 10901da177e4SLinus Torvalds 1091e48c414eSArnaldo Carvalho de Melo sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, 10921da177e4SLinus Torvalds skb->nh.iph->daddr, ntohs(th->dest), 1093463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 10941da177e4SLinus Torvalds 10951da177e4SLinus Torvalds if (!sk) 10961da177e4SLinus Torvalds goto no_tcp_socket; 10971da177e4SLinus Torvalds 10981da177e4SLinus Torvalds process: 10991da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) 11001da177e4SLinus Torvalds goto do_time_wait; 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 11031da177e4SLinus Torvalds goto discard_and_relse; 1104b59c2701SPatrick McHardy nf_reset(skb); 11051da177e4SLinus Torvalds 11061da177e4SLinus Torvalds if (sk_filter(sk, skb, 0)) 11071da177e4SLinus Torvalds goto discard_and_relse; 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds skb->dev = NULL; 11101da177e4SLinus Torvalds 1111c6366184SIngo Molnar bh_lock_sock_nested(sk); 11121da177e4SLinus Torvalds ret = 0; 11131da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 11141a2449a8SChris Leech #ifdef CONFIG_NET_DMA 11151a2449a8SChris Leech struct tcp_sock *tp = tcp_sk(sk); 11161a2449a8SChris Leech if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 11171a2449a8SChris Leech tp->ucopy.dma_chan = get_softnet_dma(); 11181a2449a8SChris Leech if (tp->ucopy.dma_chan) 11191a2449a8SChris Leech ret = tcp_v4_do_rcv(sk, skb); 11201a2449a8SChris Leech else 11211a2449a8SChris Leech #endif 11221a2449a8SChris Leech { 11231da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 11241da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 11251a2449a8SChris Leech } 11261da177e4SLinus Torvalds } else 11271da177e4SLinus Torvalds sk_add_backlog(sk, skb); 11281da177e4SLinus Torvalds bh_unlock_sock(sk); 11291da177e4SLinus Torvalds 11301da177e4SLinus Torvalds sock_put(sk); 11311da177e4SLinus Torvalds 11321da177e4SLinus Torvalds return ret; 11331da177e4SLinus Torvalds 11341da177e4SLinus Torvalds no_tcp_socket: 11351da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 11361da177e4SLinus Torvalds goto discard_it; 11371da177e4SLinus Torvalds 11381da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 11391da177e4SLinus Torvalds bad_packet: 11401da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 11411da177e4SLinus Torvalds } else { 11421da177e4SLinus Torvalds tcp_v4_send_reset(skb); 11431da177e4SLinus Torvalds } 11441da177e4SLinus Torvalds 11451da177e4SLinus Torvalds discard_it: 11461da177e4SLinus Torvalds /* Discard frame. */ 11471da177e4SLinus Torvalds kfree_skb(skb); 11481da177e4SLinus Torvalds return 0; 11491da177e4SLinus Torvalds 11501da177e4SLinus Torvalds discard_and_relse: 11511da177e4SLinus Torvalds sock_put(sk); 11521da177e4SLinus Torvalds goto discard_it; 11531da177e4SLinus Torvalds 11541da177e4SLinus Torvalds do_time_wait: 11551da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 11568feaf0c0SArnaldo Carvalho de Melo inet_twsk_put((struct inet_timewait_sock *) sk); 11571da177e4SLinus Torvalds goto discard_it; 11581da177e4SLinus Torvalds } 11591da177e4SLinus Torvalds 11601da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 11611da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 11628feaf0c0SArnaldo Carvalho de Melo inet_twsk_put((struct inet_timewait_sock *) sk); 11631da177e4SLinus Torvalds goto discard_it; 11641da177e4SLinus Torvalds } 11658feaf0c0SArnaldo Carvalho de Melo switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, 11668feaf0c0SArnaldo Carvalho de Melo skb, th)) { 11671da177e4SLinus Torvalds case TCP_TW_SYN: { 116833b62231SArnaldo Carvalho de Melo struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 116933b62231SArnaldo Carvalho de Melo skb->nh.iph->daddr, 11701da177e4SLinus Torvalds ntohs(th->dest), 1171463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 11721da177e4SLinus Torvalds if (sk2) { 1173295ff7edSArnaldo Carvalho de Melo inet_twsk_deschedule((struct inet_timewait_sock *)sk, 1174295ff7edSArnaldo Carvalho de Melo &tcp_death_row); 11758feaf0c0SArnaldo Carvalho de Melo inet_twsk_put((struct inet_timewait_sock *)sk); 11761da177e4SLinus Torvalds sk = sk2; 11771da177e4SLinus Torvalds goto process; 11781da177e4SLinus Torvalds } 11791da177e4SLinus Torvalds /* Fall through to ACK */ 11801da177e4SLinus Torvalds } 11811da177e4SLinus Torvalds case TCP_TW_ACK: 11821da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 11831da177e4SLinus Torvalds break; 11841da177e4SLinus Torvalds case TCP_TW_RST: 11851da177e4SLinus Torvalds goto no_tcp_socket; 11861da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 11871da177e4SLinus Torvalds } 11881da177e4SLinus Torvalds goto discard_it; 11891da177e4SLinus Torvalds } 11901da177e4SLinus Torvalds 11911da177e4SLinus Torvalds /* VJ's idea. Save last timestamp seen from this destination 11921da177e4SLinus Torvalds * and hold it at least for normal timewait interval to use for duplicate 11931da177e4SLinus Torvalds * segment detection in subsequent connections, before they enter synchronized 11941da177e4SLinus Torvalds * state. 11951da177e4SLinus Torvalds */ 11961da177e4SLinus Torvalds 11971da177e4SLinus Torvalds int tcp_v4_remember_stamp(struct sock *sk) 11981da177e4SLinus Torvalds { 11991da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 12001da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 12011da177e4SLinus Torvalds struct rtable *rt = (struct rtable *)__sk_dst_get(sk); 12021da177e4SLinus Torvalds struct inet_peer *peer = NULL; 12031da177e4SLinus Torvalds int release_it = 0; 12041da177e4SLinus Torvalds 12051da177e4SLinus Torvalds if (!rt || rt->rt_dst != inet->daddr) { 12061da177e4SLinus Torvalds peer = inet_getpeer(inet->daddr, 1); 12071da177e4SLinus Torvalds release_it = 1; 12081da177e4SLinus Torvalds } else { 12091da177e4SLinus Torvalds if (!rt->peer) 12101da177e4SLinus Torvalds rt_bind_peer(rt, 1); 12111da177e4SLinus Torvalds peer = rt->peer; 12121da177e4SLinus Torvalds } 12131da177e4SLinus Torvalds 12141da177e4SLinus Torvalds if (peer) { 12151da177e4SLinus Torvalds if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 12161da177e4SLinus Torvalds (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 12171da177e4SLinus Torvalds peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { 12181da177e4SLinus Torvalds peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; 12191da177e4SLinus Torvalds peer->tcp_ts = tp->rx_opt.ts_recent; 12201da177e4SLinus Torvalds } 12211da177e4SLinus Torvalds if (release_it) 12221da177e4SLinus Torvalds inet_putpeer(peer); 12231da177e4SLinus Torvalds return 1; 12241da177e4SLinus Torvalds } 12251da177e4SLinus Torvalds 12261da177e4SLinus Torvalds return 0; 12271da177e4SLinus Torvalds } 12281da177e4SLinus Torvalds 12298feaf0c0SArnaldo Carvalho de Melo int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 12301da177e4SLinus Torvalds { 12318feaf0c0SArnaldo Carvalho de Melo struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds if (peer) { 12348feaf0c0SArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 12358feaf0c0SArnaldo Carvalho de Melo 12368feaf0c0SArnaldo Carvalho de Melo if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 12371da177e4SLinus Torvalds (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 12388feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { 12398feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; 12408feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts = tcptw->tw_ts_recent; 12411da177e4SLinus Torvalds } 12421da177e4SLinus Torvalds inet_putpeer(peer); 12431da177e4SLinus Torvalds return 1; 12441da177e4SLinus Torvalds } 12451da177e4SLinus Torvalds 12461da177e4SLinus Torvalds return 0; 12471da177e4SLinus Torvalds } 12481da177e4SLinus Torvalds 12498292a17aSArnaldo Carvalho de Melo struct inet_connection_sock_af_ops ipv4_specific = { 12501da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 12511da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 125232519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 12531da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 12541da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 12551da177e4SLinus Torvalds .remember_stamp = tcp_v4_remember_stamp, 12561da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 12571da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 12581da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1259543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1260543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 12613fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 12623fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 12633fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 12643fdadf7dSDmitry Mishin #endif 12651da177e4SLinus Torvalds }; 12661da177e4SLinus Torvalds 12671da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 12681da177e4SLinus Torvalds * sk_alloc() so need not be done here. 12691da177e4SLinus Torvalds */ 12701da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 12711da177e4SLinus Torvalds { 12726687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 12731da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 12741da177e4SLinus Torvalds 12751da177e4SLinus Torvalds skb_queue_head_init(&tp->out_of_order_queue); 12761da177e4SLinus Torvalds tcp_init_xmit_timers(sk); 12771da177e4SLinus Torvalds tcp_prequeue_init(tp); 12781da177e4SLinus Torvalds 12796687e988SArnaldo Carvalho de Melo icsk->icsk_rto = TCP_TIMEOUT_INIT; 12801da177e4SLinus Torvalds tp->mdev = TCP_TIMEOUT_INIT; 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds /* So many TCP implementations out there (incorrectly) count the 12831da177e4SLinus Torvalds * initial SYN frame in their delayed-ACK and congestion control 12841da177e4SLinus Torvalds * algorithms that we must have the following bandaid to talk 12851da177e4SLinus Torvalds * efficiently to them. -DaveM 12861da177e4SLinus Torvalds */ 12871da177e4SLinus Torvalds tp->snd_cwnd = 2; 12881da177e4SLinus Torvalds 12891da177e4SLinus Torvalds /* See draft-stevens-tcpca-spec-01 for discussion of the 12901da177e4SLinus Torvalds * initialization of these values. 12911da177e4SLinus Torvalds */ 12921da177e4SLinus Torvalds tp->snd_ssthresh = 0x7fffffff; /* Infinity */ 12931da177e4SLinus Torvalds tp->snd_cwnd_clamp = ~0; 1294c1b4a7e6SDavid S. Miller tp->mss_cache = 536; 12951da177e4SLinus Torvalds 12961da177e4SLinus Torvalds tp->reordering = sysctl_tcp_reordering; 12976687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops = &tcp_init_congestion_ops; 12981da177e4SLinus Torvalds 12991da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 13001da177e4SLinus Torvalds 13011da177e4SLinus Torvalds sk->sk_write_space = sk_stream_write_space; 13021da177e4SLinus Torvalds sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 13031da177e4SLinus Torvalds 13048292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1305d83d8461SArnaldo Carvalho de Melo icsk->icsk_sync_mss = tcp_sync_mss; 13061da177e4SLinus Torvalds 13071da177e4SLinus Torvalds sk->sk_sndbuf = sysctl_tcp_wmem[1]; 13081da177e4SLinus Torvalds sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 13091da177e4SLinus Torvalds 13101da177e4SLinus Torvalds atomic_inc(&tcp_sockets_allocated); 13111da177e4SLinus Torvalds 13121da177e4SLinus Torvalds return 0; 13131da177e4SLinus Torvalds } 13141da177e4SLinus Torvalds 13151da177e4SLinus Torvalds int tcp_v4_destroy_sock(struct sock *sk) 13161da177e4SLinus Torvalds { 13171da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 13181da177e4SLinus Torvalds 13191da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 13201da177e4SLinus Torvalds 13216687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1322317a76f9SStephen Hemminger 13231da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 13241da177e4SLinus Torvalds sk_stream_writequeue_purge(sk); 13251da177e4SLinus Torvalds 13261da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 13271da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 13281da177e4SLinus Torvalds 13291a2449a8SChris Leech #ifdef CONFIG_NET_DMA 13301a2449a8SChris Leech /* Cleans up our sk_async_wait_queue */ 13311a2449a8SChris Leech __skb_queue_purge(&sk->sk_async_wait_queue); 13321a2449a8SChris Leech #endif 13331a2449a8SChris Leech 13341da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 13351da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 13361da177e4SLinus Torvalds 13371da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1338463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 13392d8c4ce5SArnaldo Carvalho de Melo inet_put_port(&tcp_hashinfo, sk); 13401da177e4SLinus Torvalds 13411da177e4SLinus Torvalds /* 13421da177e4SLinus Torvalds * If sendmsg cached page exists, toss it. 13431da177e4SLinus Torvalds */ 13441da177e4SLinus Torvalds if (sk->sk_sndmsg_page) { 13451da177e4SLinus Torvalds __free_page(sk->sk_sndmsg_page); 13461da177e4SLinus Torvalds sk->sk_sndmsg_page = NULL; 13471da177e4SLinus Torvalds } 13481da177e4SLinus Torvalds 13491da177e4SLinus Torvalds atomic_dec(&tcp_sockets_allocated); 13501da177e4SLinus Torvalds 13511da177e4SLinus Torvalds return 0; 13521da177e4SLinus Torvalds } 13531da177e4SLinus Torvalds 13541da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 13551da177e4SLinus Torvalds 13561da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 13571da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 13581da177e4SLinus Torvalds 13598feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) 13601da177e4SLinus Torvalds { 13611da177e4SLinus Torvalds return hlist_empty(head) ? NULL : 13628feaf0c0SArnaldo Carvalho de Melo list_entry(head->first, struct inet_timewait_sock, tw_node); 13631da177e4SLinus Torvalds } 13641da177e4SLinus Torvalds 13658feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 13661da177e4SLinus Torvalds { 13671da177e4SLinus Torvalds return tw->tw_node.next ? 13681da177e4SLinus Torvalds hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 13691da177e4SLinus Torvalds } 13701da177e4SLinus Torvalds 13711da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 13721da177e4SLinus Torvalds { 1373463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 13741da177e4SLinus Torvalds struct hlist_node *node; 13751da177e4SLinus Torvalds struct sock *sk = cur; 13761da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 13771da177e4SLinus Torvalds 13781da177e4SLinus Torvalds if (!sk) { 13791da177e4SLinus Torvalds st->bucket = 0; 13806e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.listening_hash[0]); 13811da177e4SLinus Torvalds goto get_sk; 13821da177e4SLinus Torvalds } 13831da177e4SLinus Torvalds 13841da177e4SLinus Torvalds ++st->num; 13851da177e4SLinus Torvalds 13861da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 138760236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 13881da177e4SLinus Torvalds 1389463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 13901da177e4SLinus Torvalds req = req->dl_next; 13911da177e4SLinus Torvalds while (1) { 13921da177e4SLinus Torvalds while (req) { 139360236fddSArnaldo Carvalho de Melo if (req->rsk_ops->family == st->family) { 13941da177e4SLinus Torvalds cur = req; 13951da177e4SLinus Torvalds goto out; 13961da177e4SLinus Torvalds } 13971da177e4SLinus Torvalds req = req->dl_next; 13981da177e4SLinus Torvalds } 13991da177e4SLinus Torvalds if (++st->sbucket >= TCP_SYNQ_HSIZE) 14001da177e4SLinus Torvalds break; 14011da177e4SLinus Torvalds get_req: 1402463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 14031da177e4SLinus Torvalds } 14041da177e4SLinus Torvalds sk = sk_next(st->syn_wait_sk); 14051da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 1406463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 14071da177e4SLinus Torvalds } else { 1408463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1409463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1410463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 14111da177e4SLinus Torvalds goto start_req; 1412463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 14131da177e4SLinus Torvalds sk = sk_next(sk); 14141da177e4SLinus Torvalds } 14151da177e4SLinus Torvalds get_sk: 14161da177e4SLinus Torvalds sk_for_each_from(sk, node) { 14171da177e4SLinus Torvalds if (sk->sk_family == st->family) { 14181da177e4SLinus Torvalds cur = sk; 14191da177e4SLinus Torvalds goto out; 14201da177e4SLinus Torvalds } 1421463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1422463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1423463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 14241da177e4SLinus Torvalds start_req: 14251da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 14261da177e4SLinus Torvalds st->syn_wait_sk = sk; 14271da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 14281da177e4SLinus Torvalds st->sbucket = 0; 14291da177e4SLinus Torvalds goto get_req; 14301da177e4SLinus Torvalds } 1431463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 14321da177e4SLinus Torvalds } 14330f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 14346e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); 14351da177e4SLinus Torvalds goto get_sk; 14361da177e4SLinus Torvalds } 14371da177e4SLinus Torvalds cur = NULL; 14381da177e4SLinus Torvalds out: 14391da177e4SLinus Torvalds return cur; 14401da177e4SLinus Torvalds } 14411da177e4SLinus Torvalds 14421da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 14431da177e4SLinus Torvalds { 14441da177e4SLinus Torvalds void *rc = listening_get_next(seq, NULL); 14451da177e4SLinus Torvalds 14461da177e4SLinus Torvalds while (rc && *pos) { 14471da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 14481da177e4SLinus Torvalds --*pos; 14491da177e4SLinus Torvalds } 14501da177e4SLinus Torvalds return rc; 14511da177e4SLinus Torvalds } 14521da177e4SLinus Torvalds 14531da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 14541da177e4SLinus Torvalds { 14551da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 14561da177e4SLinus Torvalds void *rc = NULL; 14571da177e4SLinus Torvalds 14586e04e021SArnaldo Carvalho de Melo for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 14591da177e4SLinus Torvalds struct sock *sk; 14601da177e4SLinus Torvalds struct hlist_node *node; 14618feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 14621da177e4SLinus Torvalds 14631da177e4SLinus Torvalds /* We can reschedule _before_ having picked the target: */ 14641da177e4SLinus Torvalds cond_resched_softirq(); 14651da177e4SLinus Torvalds 14666e04e021SArnaldo Carvalho de Melo read_lock(&tcp_hashinfo.ehash[st->bucket].lock); 14676e04e021SArnaldo Carvalho de Melo sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 14681da177e4SLinus Torvalds if (sk->sk_family != st->family) { 14691da177e4SLinus Torvalds continue; 14701da177e4SLinus Torvalds } 14711da177e4SLinus Torvalds rc = sk; 14721da177e4SLinus Torvalds goto out; 14731da177e4SLinus Torvalds } 14741da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 14758feaf0c0SArnaldo Carvalho de Melo inet_twsk_for_each(tw, node, 14766e04e021SArnaldo Carvalho de Melo &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { 14771da177e4SLinus Torvalds if (tw->tw_family != st->family) { 14781da177e4SLinus Torvalds continue; 14791da177e4SLinus Torvalds } 14801da177e4SLinus Torvalds rc = tw; 14811da177e4SLinus Torvalds goto out; 14821da177e4SLinus Torvalds } 14836e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 14841da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 14851da177e4SLinus Torvalds } 14861da177e4SLinus Torvalds out: 14871da177e4SLinus Torvalds return rc; 14881da177e4SLinus Torvalds } 14891da177e4SLinus Torvalds 14901da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 14911da177e4SLinus Torvalds { 14921da177e4SLinus Torvalds struct sock *sk = cur; 14938feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 14941da177e4SLinus Torvalds struct hlist_node *node; 14951da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 14961da177e4SLinus Torvalds 14971da177e4SLinus Torvalds ++st->num; 14981da177e4SLinus Torvalds 14991da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 15001da177e4SLinus Torvalds tw = cur; 15011da177e4SLinus Torvalds tw = tw_next(tw); 15021da177e4SLinus Torvalds get_tw: 15031da177e4SLinus Torvalds while (tw && tw->tw_family != st->family) { 15041da177e4SLinus Torvalds tw = tw_next(tw); 15051da177e4SLinus Torvalds } 15061da177e4SLinus Torvalds if (tw) { 15071da177e4SLinus Torvalds cur = tw; 15081da177e4SLinus Torvalds goto out; 15091da177e4SLinus Torvalds } 15106e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 15111da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 15121da177e4SLinus Torvalds 15131da177e4SLinus Torvalds /* We can reschedule between buckets: */ 15141da177e4SLinus Torvalds cond_resched_softirq(); 15151da177e4SLinus Torvalds 15166e04e021SArnaldo Carvalho de Melo if (++st->bucket < tcp_hashinfo.ehash_size) { 15176e04e021SArnaldo Carvalho de Melo read_lock(&tcp_hashinfo.ehash[st->bucket].lock); 15186e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); 15191da177e4SLinus Torvalds } else { 15201da177e4SLinus Torvalds cur = NULL; 15211da177e4SLinus Torvalds goto out; 15221da177e4SLinus Torvalds } 15231da177e4SLinus Torvalds } else 15241da177e4SLinus Torvalds sk = sk_next(sk); 15251da177e4SLinus Torvalds 15261da177e4SLinus Torvalds sk_for_each_from(sk, node) { 15271da177e4SLinus Torvalds if (sk->sk_family == st->family) 15281da177e4SLinus Torvalds goto found; 15291da177e4SLinus Torvalds } 15301da177e4SLinus Torvalds 15311da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 15326e04e021SArnaldo Carvalho de Melo tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); 15331da177e4SLinus Torvalds goto get_tw; 15341da177e4SLinus Torvalds found: 15351da177e4SLinus Torvalds cur = sk; 15361da177e4SLinus Torvalds out: 15371da177e4SLinus Torvalds return cur; 15381da177e4SLinus Torvalds } 15391da177e4SLinus Torvalds 15401da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 15411da177e4SLinus Torvalds { 15421da177e4SLinus Torvalds void *rc = established_get_first(seq); 15431da177e4SLinus Torvalds 15441da177e4SLinus Torvalds while (rc && pos) { 15451da177e4SLinus Torvalds rc = established_get_next(seq, rc); 15461da177e4SLinus Torvalds --pos; 15471da177e4SLinus Torvalds } 15481da177e4SLinus Torvalds return rc; 15491da177e4SLinus Torvalds } 15501da177e4SLinus Torvalds 15511da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 15521da177e4SLinus Torvalds { 15531da177e4SLinus Torvalds void *rc; 15541da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 15551da177e4SLinus Torvalds 1556f3f05f70SArnaldo Carvalho de Melo inet_listen_lock(&tcp_hashinfo); 15571da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 15581da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 15591da177e4SLinus Torvalds 15601da177e4SLinus Torvalds if (!rc) { 1561f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 15621da177e4SLinus Torvalds local_bh_disable(); 15631da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 15641da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 15651da177e4SLinus Torvalds } 15661da177e4SLinus Torvalds 15671da177e4SLinus Torvalds return rc; 15681da177e4SLinus Torvalds } 15691da177e4SLinus Torvalds 15701da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 15711da177e4SLinus Torvalds { 15721da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 15731da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 15741da177e4SLinus Torvalds st->num = 0; 15751da177e4SLinus Torvalds return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 15761da177e4SLinus Torvalds } 15771da177e4SLinus Torvalds 15781da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 15791da177e4SLinus Torvalds { 15801da177e4SLinus Torvalds void *rc = NULL; 15811da177e4SLinus Torvalds struct tcp_iter_state* st; 15821da177e4SLinus Torvalds 15831da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 15841da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 15851da177e4SLinus Torvalds goto out; 15861da177e4SLinus Torvalds } 15871da177e4SLinus Torvalds st = seq->private; 15881da177e4SLinus Torvalds 15891da177e4SLinus Torvalds switch (st->state) { 15901da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 15911da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 15921da177e4SLinus Torvalds rc = listening_get_next(seq, v); 15931da177e4SLinus Torvalds if (!rc) { 1594f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 15951da177e4SLinus Torvalds local_bh_disable(); 15961da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 15971da177e4SLinus Torvalds rc = established_get_first(seq); 15981da177e4SLinus Torvalds } 15991da177e4SLinus Torvalds break; 16001da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 16011da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 16021da177e4SLinus Torvalds rc = established_get_next(seq, v); 16031da177e4SLinus Torvalds break; 16041da177e4SLinus Torvalds } 16051da177e4SLinus Torvalds out: 16061da177e4SLinus Torvalds ++*pos; 16071da177e4SLinus Torvalds return rc; 16081da177e4SLinus Torvalds } 16091da177e4SLinus Torvalds 16101da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 16111da177e4SLinus Torvalds { 16121da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 16131da177e4SLinus Torvalds 16141da177e4SLinus Torvalds switch (st->state) { 16151da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 16161da177e4SLinus Torvalds if (v) { 1617463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 1618463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 16191da177e4SLinus Torvalds } 16201da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 16211da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 1622f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 16231da177e4SLinus Torvalds break; 16241da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 16251da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 16261da177e4SLinus Torvalds if (v) 16276e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 16281da177e4SLinus Torvalds local_bh_enable(); 16291da177e4SLinus Torvalds break; 16301da177e4SLinus Torvalds } 16311da177e4SLinus Torvalds } 16321da177e4SLinus Torvalds 16331da177e4SLinus Torvalds static int tcp_seq_open(struct inode *inode, struct file *file) 16341da177e4SLinus Torvalds { 16351da177e4SLinus Torvalds struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 16361da177e4SLinus Torvalds struct seq_file *seq; 16371da177e4SLinus Torvalds struct tcp_iter_state *s; 16381da177e4SLinus Torvalds int rc; 16391da177e4SLinus Torvalds 16401da177e4SLinus Torvalds if (unlikely(afinfo == NULL)) 16411da177e4SLinus Torvalds return -EINVAL; 16421da177e4SLinus Torvalds 16431da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 16441da177e4SLinus Torvalds if (!s) 16451da177e4SLinus Torvalds return -ENOMEM; 16461da177e4SLinus Torvalds memset(s, 0, sizeof(*s)); 16471da177e4SLinus Torvalds s->family = afinfo->family; 16481da177e4SLinus Torvalds s->seq_ops.start = tcp_seq_start; 16491da177e4SLinus Torvalds s->seq_ops.next = tcp_seq_next; 16501da177e4SLinus Torvalds s->seq_ops.show = afinfo->seq_show; 16511da177e4SLinus Torvalds s->seq_ops.stop = tcp_seq_stop; 16521da177e4SLinus Torvalds 16531da177e4SLinus Torvalds rc = seq_open(file, &s->seq_ops); 16541da177e4SLinus Torvalds if (rc) 16551da177e4SLinus Torvalds goto out_kfree; 16561da177e4SLinus Torvalds seq = file->private_data; 16571da177e4SLinus Torvalds seq->private = s; 16581da177e4SLinus Torvalds out: 16591da177e4SLinus Torvalds return rc; 16601da177e4SLinus Torvalds out_kfree: 16611da177e4SLinus Torvalds kfree(s); 16621da177e4SLinus Torvalds goto out; 16631da177e4SLinus Torvalds } 16641da177e4SLinus Torvalds 16651da177e4SLinus Torvalds int tcp_proc_register(struct tcp_seq_afinfo *afinfo) 16661da177e4SLinus Torvalds { 16671da177e4SLinus Torvalds int rc = 0; 16681da177e4SLinus Torvalds struct proc_dir_entry *p; 16691da177e4SLinus Torvalds 16701da177e4SLinus Torvalds if (!afinfo) 16711da177e4SLinus Torvalds return -EINVAL; 16721da177e4SLinus Torvalds afinfo->seq_fops->owner = afinfo->owner; 16731da177e4SLinus Torvalds afinfo->seq_fops->open = tcp_seq_open; 16741da177e4SLinus Torvalds afinfo->seq_fops->read = seq_read; 16751da177e4SLinus Torvalds afinfo->seq_fops->llseek = seq_lseek; 16761da177e4SLinus Torvalds afinfo->seq_fops->release = seq_release_private; 16771da177e4SLinus Torvalds 16781da177e4SLinus Torvalds p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); 16791da177e4SLinus Torvalds if (p) 16801da177e4SLinus Torvalds p->data = afinfo; 16811da177e4SLinus Torvalds else 16821da177e4SLinus Torvalds rc = -ENOMEM; 16831da177e4SLinus Torvalds return rc; 16841da177e4SLinus Torvalds } 16851da177e4SLinus Torvalds 16861da177e4SLinus Torvalds void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) 16871da177e4SLinus Torvalds { 16881da177e4SLinus Torvalds if (!afinfo) 16891da177e4SLinus Torvalds return; 16901da177e4SLinus Torvalds proc_net_remove(afinfo->name); 16911da177e4SLinus Torvalds memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 16921da177e4SLinus Torvalds } 16931da177e4SLinus Torvalds 169460236fddSArnaldo Carvalho de Melo static void get_openreq4(struct sock *sk, struct request_sock *req, 16951da177e4SLinus Torvalds char *tmpbuf, int i, int uid) 16961da177e4SLinus Torvalds { 16972e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 16981da177e4SLinus Torvalds int ttd = req->expires - jiffies; 16991da177e4SLinus Torvalds 17001da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 17011da177e4SLinus Torvalds " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 17021da177e4SLinus Torvalds i, 17032e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 17041da177e4SLinus Torvalds ntohs(inet_sk(sk)->sport), 17052e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 17062e6599cbSArnaldo Carvalho de Melo ntohs(ireq->rmt_port), 17071da177e4SLinus Torvalds TCP_SYN_RECV, 17081da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 17091da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 17101da177e4SLinus Torvalds jiffies_to_clock_t(ttd), 17111da177e4SLinus Torvalds req->retrans, 17121da177e4SLinus Torvalds uid, 17131da177e4SLinus Torvalds 0, /* non standard timer */ 17141da177e4SLinus Torvalds 0, /* open_requests have no inode */ 17151da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 17161da177e4SLinus Torvalds req); 17171da177e4SLinus Torvalds } 17181da177e4SLinus Torvalds 17191da177e4SLinus Torvalds static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) 17201da177e4SLinus Torvalds { 17211da177e4SLinus Torvalds int timer_active; 17221da177e4SLinus Torvalds unsigned long timer_expires; 17231da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sp); 1724463c84b9SArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sp); 17251da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sp); 17261da177e4SLinus Torvalds unsigned int dest = inet->daddr; 17271da177e4SLinus Torvalds unsigned int src = inet->rcv_saddr; 17281da177e4SLinus Torvalds __u16 destp = ntohs(inet->dport); 17291da177e4SLinus Torvalds __u16 srcp = ntohs(inet->sport); 17301da177e4SLinus Torvalds 1731463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 17321da177e4SLinus Torvalds timer_active = 1; 1733463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 1734463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 17351da177e4SLinus Torvalds timer_active = 4; 1736463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 17371da177e4SLinus Torvalds } else if (timer_pending(&sp->sk_timer)) { 17381da177e4SLinus Torvalds timer_active = 2; 17391da177e4SLinus Torvalds timer_expires = sp->sk_timer.expires; 17401da177e4SLinus Torvalds } else { 17411da177e4SLinus Torvalds timer_active = 0; 17421da177e4SLinus Torvalds timer_expires = jiffies; 17431da177e4SLinus Torvalds } 17441da177e4SLinus Torvalds 17451da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 17461da177e4SLinus Torvalds "%08X %5d %8d %lu %d %p %u %u %u %u %d", 17471da177e4SLinus Torvalds i, src, srcp, dest, destp, sp->sk_state, 174847da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 174947da8ee6SSridhar Samudrala (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 17501da177e4SLinus Torvalds timer_active, 17511da177e4SLinus Torvalds jiffies_to_clock_t(timer_expires - jiffies), 1752463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 17531da177e4SLinus Torvalds sock_i_uid(sp), 17546687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 17551da177e4SLinus Torvalds sock_i_ino(sp), 17561da177e4SLinus Torvalds atomic_read(&sp->sk_refcnt), sp, 1757463c84b9SArnaldo Carvalho de Melo icsk->icsk_rto, 1758463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.ato, 1759463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 17601da177e4SLinus Torvalds tp->snd_cwnd, 17611da177e4SLinus Torvalds tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); 17621da177e4SLinus Torvalds } 17631da177e4SLinus Torvalds 17648feaf0c0SArnaldo Carvalho de Melo static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) 17651da177e4SLinus Torvalds { 17661da177e4SLinus Torvalds unsigned int dest, src; 17671da177e4SLinus Torvalds __u16 destp, srcp; 17681da177e4SLinus Torvalds int ttd = tw->tw_ttd - jiffies; 17691da177e4SLinus Torvalds 17701da177e4SLinus Torvalds if (ttd < 0) 17711da177e4SLinus Torvalds ttd = 0; 17721da177e4SLinus Torvalds 17731da177e4SLinus Torvalds dest = tw->tw_daddr; 17741da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 17751da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 17761da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 17771da177e4SLinus Torvalds 17781da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 17791da177e4SLinus Torvalds " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", 17801da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 17811da177e4SLinus Torvalds 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 17821da177e4SLinus Torvalds atomic_read(&tw->tw_refcnt), tw); 17831da177e4SLinus Torvalds } 17841da177e4SLinus Torvalds 17851da177e4SLinus Torvalds #define TMPSZ 150 17861da177e4SLinus Torvalds 17871da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 17881da177e4SLinus Torvalds { 17891da177e4SLinus Torvalds struct tcp_iter_state* st; 17901da177e4SLinus Torvalds char tmpbuf[TMPSZ + 1]; 17911da177e4SLinus Torvalds 17921da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 17931da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, 17941da177e4SLinus Torvalds " sl local_address rem_address st tx_queue " 17951da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 17961da177e4SLinus Torvalds "inode"); 17971da177e4SLinus Torvalds goto out; 17981da177e4SLinus Torvalds } 17991da177e4SLinus Torvalds st = seq->private; 18001da177e4SLinus Torvalds 18011da177e4SLinus Torvalds switch (st->state) { 18021da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 18031da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 18041da177e4SLinus Torvalds get_tcp4_sock(v, tmpbuf, st->num); 18051da177e4SLinus Torvalds break; 18061da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 18071da177e4SLinus Torvalds get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid); 18081da177e4SLinus Torvalds break; 18091da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 18101da177e4SLinus Torvalds get_timewait4_sock(v, tmpbuf, st->num); 18111da177e4SLinus Torvalds break; 18121da177e4SLinus Torvalds } 18131da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); 18141da177e4SLinus Torvalds out: 18151da177e4SLinus Torvalds return 0; 18161da177e4SLinus Torvalds } 18171da177e4SLinus Torvalds 18181da177e4SLinus Torvalds static struct file_operations tcp4_seq_fops; 18191da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 18201da177e4SLinus Torvalds .owner = THIS_MODULE, 18211da177e4SLinus Torvalds .name = "tcp", 18221da177e4SLinus Torvalds .family = AF_INET, 18231da177e4SLinus Torvalds .seq_show = tcp4_seq_show, 18241da177e4SLinus Torvalds .seq_fops = &tcp4_seq_fops, 18251da177e4SLinus Torvalds }; 18261da177e4SLinus Torvalds 18271da177e4SLinus Torvalds int __init tcp4_proc_init(void) 18281da177e4SLinus Torvalds { 18291da177e4SLinus Torvalds return tcp_proc_register(&tcp4_seq_afinfo); 18301da177e4SLinus Torvalds } 18311da177e4SLinus Torvalds 18321da177e4SLinus Torvalds void tcp4_proc_exit(void) 18331da177e4SLinus Torvalds { 18341da177e4SLinus Torvalds tcp_proc_unregister(&tcp4_seq_afinfo); 18351da177e4SLinus Torvalds } 18361da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 18371da177e4SLinus Torvalds 18381da177e4SLinus Torvalds struct proto tcp_prot = { 18391da177e4SLinus Torvalds .name = "TCP", 18401da177e4SLinus Torvalds .owner = THIS_MODULE, 18411da177e4SLinus Torvalds .close = tcp_close, 18421da177e4SLinus Torvalds .connect = tcp_v4_connect, 18431da177e4SLinus Torvalds .disconnect = tcp_disconnect, 1844463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 18451da177e4SLinus Torvalds .ioctl = tcp_ioctl, 18461da177e4SLinus Torvalds .init = tcp_v4_init_sock, 18471da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 18481da177e4SLinus Torvalds .shutdown = tcp_shutdown, 18491da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 18501da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 18511da177e4SLinus Torvalds .sendmsg = tcp_sendmsg, 18521da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 18531da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 18541da177e4SLinus Torvalds .hash = tcp_v4_hash, 18551da177e4SLinus Torvalds .unhash = tcp_unhash, 18561da177e4SLinus Torvalds .get_port = tcp_v4_get_port, 18571da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 18581da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 18590a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 18601da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 18611da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 18621da177e4SLinus Torvalds .sysctl_mem = sysctl_tcp_mem, 18631da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 18641da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 18651da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 18661da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 18676d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 186860236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 1869543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 1870543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 1871543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 1872543d9cfeSArnaldo Carvalho de Melo #endif 18731da177e4SLinus Torvalds }; 18741da177e4SLinus Torvalds 18751da177e4SLinus Torvalds void __init tcp_v4_init(struct net_proto_family *ops) 18761da177e4SLinus Torvalds { 1877c4d93909SArnaldo Carvalho de Melo if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0) 18781da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 18791da177e4SLinus Torvalds } 18801da177e4SLinus Torvalds 18811da177e4SLinus Torvalds EXPORT_SYMBOL(ipv4_specific); 18821da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_hashinfo); 18831da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_prot); 18841da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_unhash); 18851da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_conn_request); 18861da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_connect); 18871da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_do_rcv); 18881da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_remember_stamp); 18891da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_send_check); 18901da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 18911da177e4SLinus Torvalds 18921da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 18931da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_register); 18941da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_unregister); 18951da177e4SLinus Torvalds #endif 18961da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_local_port_range); 18971da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_tcp_low_latency); 18981da177e4SLinus Torvalds 1899