11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * IPv4 specific functions 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * code split from: 141da177e4SLinus Torvalds * linux/ipv4/tcp.c 151da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 161da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * See tcp.c for author information 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 211da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 221da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 231da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 241da177e4SLinus Torvalds */ 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * Changes: 281da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 291da177e4SLinus Torvalds * This code is dedicated to John Dyson. 301da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 311da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 321da177e4SLinus Torvalds * and the rest go in the other half. 331da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 341da177e4SLinus Torvalds * some bugs: ip options weren't passed to 351da177e4SLinus Torvalds * the TCP layer, missed a check for an 361da177e4SLinus Torvalds * ACK bit. 371da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 381da177e4SLinus Torvalds * Fixed many serious bugs in the 3960236fddSArnaldo Carvalho de Melo * request_sock handling and moved 401da177e4SLinus Torvalds * most of it into the af independent code. 411da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 42caa20d9aSStephen Hemminger * Added new listen semantics. 431da177e4SLinus Torvalds * Mike McLagan : Routing by source 441da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 451da177e4SLinus Torvalds * Andi Kleen: various fixes. 461da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 471da177e4SLinus Torvalds * coma. 481da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 491da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 501da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 511da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 521da177e4SLinus Torvalds * a single port at the same time. 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds #include <net/icmp.h> 66304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 671da177e4SLinus Torvalds #include <net/tcp.h> 6820380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 691da177e4SLinus Torvalds #include <net/ipv6.h> 701da177e4SLinus Torvalds #include <net/inet_common.h> 716d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 721da177e4SLinus Torvalds #include <net/xfrm.h> 731a2449a8SChris Leech #include <net/netdma.h> 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #include <linux/inet.h> 761da177e4SLinus Torvalds #include <linux/ipv6.h> 771da177e4SLinus Torvalds #include <linux/stddef.h> 781da177e4SLinus Torvalds #include <linux/proc_fs.h> 791da177e4SLinus Torvalds #include <linux/seq_file.h> 801da177e4SLinus Torvalds 81cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 82cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 83cfb6eeb4SYOSHIFUJI Hideaki 84ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 85ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds /* Check TCP sequence numbers in ICMP packets. */ 881da177e4SLinus Torvalds #define ICMP_MIN_LENGTH 8 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds /* Socket used for sending RSTs */ 911da177e4SLinus Torvalds static struct socket *tcp_socket; 921da177e4SLinus Torvalds 938292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 941da177e4SLinus Torvalds 95cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 967174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 977174259eSArnaldo Carvalho de Melo __be32 addr); 98cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 997174259eSArnaldo Carvalho de Melo __be32 saddr, __be32 daddr, 1007174259eSArnaldo Carvalho de Melo struct tcphdr *th, int protocol, 1017174259eSArnaldo Carvalho de Melo int tcplen); 102cfb6eeb4SYOSHIFUJI Hideaki #endif 103cfb6eeb4SYOSHIFUJI Hideaki 1040f7ff927SArnaldo Carvalho de Melo struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 105e4d91918SIngo Molnar .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), 1060f7ff927SArnaldo Carvalho de Melo .lhash_users = ATOMIC_INIT(0), 1070f7ff927SArnaldo Carvalho de Melo .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 1081da177e4SLinus Torvalds }; 1091da177e4SLinus Torvalds 110463c84b9SArnaldo Carvalho de Melo static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 111463c84b9SArnaldo Carvalho de Melo { 112971af18bSArnaldo Carvalho de Melo return inet_csk_get_port(&tcp_hashinfo, sk, snum, 113971af18bSArnaldo Carvalho de Melo inet_csk_bind_conflict); 114463c84b9SArnaldo Carvalho de Melo } 115463c84b9SArnaldo Carvalho de Melo 1161da177e4SLinus Torvalds static void tcp_v4_hash(struct sock *sk) 1171da177e4SLinus Torvalds { 11881849d10SArnaldo Carvalho de Melo inet_hash(&tcp_hashinfo, sk); 1191da177e4SLinus Torvalds } 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds void tcp_unhash(struct sock *sk) 1221da177e4SLinus Torvalds { 12381849d10SArnaldo Carvalho de Melo inet_unhash(&tcp_hashinfo, sk); 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds 126a94f723dSGerrit Renker static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 1271da177e4SLinus Torvalds { 128eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 129eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 1301da177e4SLinus Torvalds skb->h.th->dest, 1311da177e4SLinus Torvalds skb->h.th->source); 1321da177e4SLinus Torvalds } 1331da177e4SLinus Torvalds 1346d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1356d6ee43eSArnaldo Carvalho de Melo { 1366d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1376d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1386d6ee43eSArnaldo Carvalho de Melo 1396d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1406d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1416d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1426d6ee43eSArnaldo Carvalho de Melo 1436d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1446d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1456d6ee43eSArnaldo Carvalho de Melo holder. 1466d6ee43eSArnaldo Carvalho de Melo 1476d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1486d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1496d6ee43eSArnaldo Carvalho de Melo */ 1506d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1516d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1529d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1536d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1546d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1556d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1566d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1576d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1586d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1596d6ee43eSArnaldo Carvalho de Melo return 1; 1606d6ee43eSArnaldo Carvalho de Melo } 1616d6ee43eSArnaldo Carvalho de Melo 1626d6ee43eSArnaldo Carvalho de Melo return 0; 1636d6ee43eSArnaldo Carvalho de Melo } 1646d6ee43eSArnaldo Carvalho de Melo 1656d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1666d6ee43eSArnaldo Carvalho de Melo 1671da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1681da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1691da177e4SLinus Torvalds { 1701da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1711da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 1721da177e4SLinus Torvalds struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1731da177e4SLinus Torvalds struct rtable *rt; 174bada8adcSAl Viro __be32 daddr, nexthop; 1751da177e4SLinus Torvalds int tmp; 1761da177e4SLinus Torvalds int err; 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1791da177e4SLinus Torvalds return -EINVAL; 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1821da177e4SLinus Torvalds return -EAFNOSUPPORT; 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 1851da177e4SLinus Torvalds if (inet->opt && inet->opt->srr) { 1861da177e4SLinus Torvalds if (!daddr) 1871da177e4SLinus Torvalds return -EINVAL; 1881da177e4SLinus Torvalds nexthop = inet->opt->faddr; 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds tmp = ip_route_connect(&rt, nexthop, inet->saddr, 1921da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1931da177e4SLinus Torvalds IPPROTO_TCP, 1948eb9086fSDavid S. Miller inet->sport, usin->sin_port, sk, 1); 1951da177e4SLinus Torvalds if (tmp < 0) 1961da177e4SLinus Torvalds return tmp; 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1991da177e4SLinus Torvalds ip_rt_put(rt); 2001da177e4SLinus Torvalds return -ENETUNREACH; 2011da177e4SLinus Torvalds } 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds if (!inet->opt || !inet->opt->srr) 2041da177e4SLinus Torvalds daddr = rt->rt_dst; 2051da177e4SLinus Torvalds 2061da177e4SLinus Torvalds if (!inet->saddr) 2071da177e4SLinus Torvalds inet->saddr = rt->rt_src; 2081da177e4SLinus Torvalds inet->rcv_saddr = inet->saddr; 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { 2111da177e4SLinus Torvalds /* Reset inherited state */ 2121da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 2131da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 2141da177e4SLinus Torvalds tp->write_seq = 0; 2151da177e4SLinus Torvalds } 2161da177e4SLinus Torvalds 217295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 2181da177e4SLinus Torvalds !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 2191da177e4SLinus Torvalds struct inet_peer *peer = rt_get_peer(rt); 2207174259eSArnaldo Carvalho de Melo /* 2217174259eSArnaldo Carvalho de Melo * VJ's idea. We save last timestamp seen from 2227174259eSArnaldo Carvalho de Melo * the destination in peer table, when entering state 2237174259eSArnaldo Carvalho de Melo * TIME-WAIT * and initialize rx_opt.ts_recent from it, 2247174259eSArnaldo Carvalho de Melo * when trying new connection. 2251da177e4SLinus Torvalds */ 2267174259eSArnaldo Carvalho de Melo if (peer != NULL && 2279d729f72SJames Morris peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { 2281da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 2291da177e4SLinus Torvalds tp->rx_opt.ts_recent = peer->tcp_ts; 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds } 2321da177e4SLinus Torvalds 2331da177e4SLinus Torvalds inet->dport = usin->sin_port; 2341da177e4SLinus Torvalds inet->daddr = daddr; 2351da177e4SLinus Torvalds 236d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 2371da177e4SLinus Torvalds if (inet->opt) 238d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds tp->rx_opt.mss_clamp = 536; 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2431da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2441da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2451da177e4SLinus Torvalds * complete initialization after this. 2461da177e4SLinus Torvalds */ 2471da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 248a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2491da177e4SLinus Torvalds if (err) 2501da177e4SLinus Torvalds goto failure; 2511da177e4SLinus Torvalds 2527174259eSArnaldo Carvalho de Melo err = ip_route_newports(&rt, IPPROTO_TCP, 2537174259eSArnaldo Carvalho de Melo inet->sport, inet->dport, sk); 2541da177e4SLinus Torvalds if (err) 2551da177e4SLinus Torvalds goto failure; 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 258bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 2596cbb0df7SArnaldo Carvalho de Melo sk_setup_caps(sk, &rt->u.dst); 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds if (!tp->write_seq) 2621da177e4SLinus Torvalds tp->write_seq = secure_tcp_sequence_number(inet->saddr, 2631da177e4SLinus Torvalds inet->daddr, 2641da177e4SLinus Torvalds inet->sport, 2651da177e4SLinus Torvalds usin->sin_port); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds inet->id = tp->write_seq ^ jiffies; 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds err = tcp_connect(sk); 2701da177e4SLinus Torvalds rt = NULL; 2711da177e4SLinus Torvalds if (err) 2721da177e4SLinus Torvalds goto failure; 2731da177e4SLinus Torvalds 2741da177e4SLinus Torvalds return 0; 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds failure: 2777174259eSArnaldo Carvalho de Melo /* 2787174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2797174259eSArnaldo Carvalho de Melo * if necessary. 2807174259eSArnaldo Carvalho de Melo */ 2811da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2821da177e4SLinus Torvalds ip_rt_put(rt); 2831da177e4SLinus Torvalds sk->sk_route_caps = 0; 2841da177e4SLinus Torvalds inet->dport = 0; 2851da177e4SLinus Torvalds return err; 2861da177e4SLinus Torvalds } 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds /* 2891da177e4SLinus Torvalds * This routine does path mtu discovery as defined in RFC1191. 2901da177e4SLinus Torvalds */ 29140efc6faSStephen Hemminger static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 2921da177e4SLinus Torvalds { 2931da177e4SLinus Torvalds struct dst_entry *dst; 2941da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 2971da177e4SLinus Torvalds * send out by Linux are always <576bytes so they should go through 2981da177e4SLinus Torvalds * unfragmented). 2991da177e4SLinus Torvalds */ 3001da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) 3011da177e4SLinus Torvalds return; 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds /* We don't check in the destentry if pmtu discovery is forbidden 3041da177e4SLinus Torvalds * on this route. We just assume that no packet_to_big packets 3051da177e4SLinus Torvalds * are send back when pmtu discovery is not active. 3061da177e4SLinus Torvalds * There is a small race when the user changes this flag in the 3071da177e4SLinus Torvalds * route, but I think that's acceptable. 3081da177e4SLinus Torvalds */ 3091da177e4SLinus Torvalds if ((dst = __sk_dst_check(sk, 0)) == NULL) 3101da177e4SLinus Torvalds return; 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds dst->ops->update_pmtu(dst, mtu); 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 3151da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 3181da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds mtu = dst_mtu(dst); 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 323d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3241da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds /* Resend the TCP packet because it's 3271da177e4SLinus Torvalds * clear that the old packet has been 3281da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3291da177e4SLinus Torvalds * discovery. 3301da177e4SLinus Torvalds */ 3311da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3321da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds /* 3361da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3371da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3381da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3391da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3401da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3411da177e4SLinus Torvalds * to find the appropriate port. 3421da177e4SLinus Torvalds * 3431da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3441da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3451da177e4SLinus Torvalds * and for some paths there is no check at all. 3461da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3471da177e4SLinus Torvalds * is probably better. 3481da177e4SLinus Torvalds * 3491da177e4SLinus Torvalds */ 3501da177e4SLinus Torvalds 3511da177e4SLinus Torvalds void tcp_v4_err(struct sk_buff *skb, u32 info) 3521da177e4SLinus Torvalds { 3531da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr *)skb->data; 3541da177e4SLinus Torvalds struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 3551da177e4SLinus Torvalds struct tcp_sock *tp; 3561da177e4SLinus Torvalds struct inet_sock *inet; 35788c7664fSArnaldo Carvalho de Melo const int type = icmp_hdr(skb)->type; 35888c7664fSArnaldo Carvalho de Melo const int code = icmp_hdr(skb)->code; 3591da177e4SLinus Torvalds struct sock *sk; 3601da177e4SLinus Torvalds __u32 seq; 3611da177e4SLinus Torvalds int err; 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds if (skb->len < (iph->ihl << 2) + 8) { 3641da177e4SLinus Torvalds ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 3651da177e4SLinus Torvalds return; 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds 368e48c414eSArnaldo Carvalho de Melo sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, 369463c84b9SArnaldo Carvalho de Melo th->source, inet_iif(skb)); 3701da177e4SLinus Torvalds if (!sk) { 3711da177e4SLinus Torvalds ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 3721da177e4SLinus Torvalds return; 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3759469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3761da177e4SLinus Torvalds return; 3771da177e4SLinus Torvalds } 3781da177e4SLinus Torvalds 3791da177e4SLinus Torvalds bh_lock_sock(sk); 3801da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3811da177e4SLinus Torvalds * servers this needs to be solved differently. 3821da177e4SLinus Torvalds */ 3831da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 3841da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 3851da177e4SLinus Torvalds 3861da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3871da177e4SLinus Torvalds goto out; 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds tp = tcp_sk(sk); 3901da177e4SLinus Torvalds seq = ntohl(th->seq); 3911da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 3921da177e4SLinus Torvalds !between(seq, tp->snd_una, tp->snd_nxt)) { 39306ca719fSEric Dumazet NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 3941da177e4SLinus Torvalds goto out; 3951da177e4SLinus Torvalds } 3961da177e4SLinus Torvalds 3971da177e4SLinus Torvalds switch (type) { 3981da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 3991da177e4SLinus Torvalds /* Just silently ignore these. */ 4001da177e4SLinus Torvalds goto out; 4011da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4021da177e4SLinus Torvalds err = EPROTO; 4031da177e4SLinus Torvalds break; 4041da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4051da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4061da177e4SLinus Torvalds goto out; 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4091da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) 4101da177e4SLinus Torvalds do_pmtu_discovery(sk, iph, info); 4111da177e4SLinus Torvalds goto out; 4121da177e4SLinus Torvalds } 4131da177e4SLinus Torvalds 4141da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 4151da177e4SLinus Torvalds break; 4161da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4171da177e4SLinus Torvalds err = EHOSTUNREACH; 4181da177e4SLinus Torvalds break; 4191da177e4SLinus Torvalds default: 4201da177e4SLinus Torvalds goto out; 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds 4231da177e4SLinus Torvalds switch (sk->sk_state) { 42460236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4251da177e4SLinus Torvalds case TCP_LISTEN: 4261da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4271da177e4SLinus Torvalds goto out; 4281da177e4SLinus Torvalds 429463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4301da177e4SLinus Torvalds iph->daddr, iph->saddr); 4311da177e4SLinus Torvalds if (!req) 4321da177e4SLinus Torvalds goto out; 4331da177e4SLinus Torvalds 4341da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4351da177e4SLinus Torvalds an established socket here. 4361da177e4SLinus Torvalds */ 4371da177e4SLinus Torvalds BUG_TRAP(!req->sk); 4381da177e4SLinus Torvalds 4392e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 4401da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 4411da177e4SLinus Torvalds goto out; 4421da177e4SLinus Torvalds } 4431da177e4SLinus Torvalds 4441da177e4SLinus Torvalds /* 4451da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 4461da177e4SLinus Torvalds * There is no good way to pass the error to the newly 4471da177e4SLinus Torvalds * created socket, and POSIX does not want network 4481da177e4SLinus Torvalds * errors returned from accept(). 4491da177e4SLinus Torvalds */ 450463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 4511da177e4SLinus Torvalds goto out; 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds case TCP_SYN_SENT: 4541da177e4SLinus Torvalds case TCP_SYN_RECV: /* Cannot happen. 4551da177e4SLinus Torvalds It can f.e. if SYNs crossed. 4561da177e4SLinus Torvalds */ 4571da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 4581da177e4SLinus Torvalds sk->sk_err = err; 4591da177e4SLinus Torvalds 4601da177e4SLinus Torvalds sk->sk_error_report(sk); 4611da177e4SLinus Torvalds 4621da177e4SLinus Torvalds tcp_done(sk); 4631da177e4SLinus Torvalds } else { 4641da177e4SLinus Torvalds sk->sk_err_soft = err; 4651da177e4SLinus Torvalds } 4661da177e4SLinus Torvalds goto out; 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds /* If we've already connected we will keep trying 4701da177e4SLinus Torvalds * until we time out, or the user gives up. 4711da177e4SLinus Torvalds * 4721da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 4731da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 4741da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 4751da177e4SLinus Torvalds * 4761da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 4771da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 4781da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 4791da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 4801da177e4SLinus Torvalds * 4811da177e4SLinus Torvalds * Now we are in compliance with RFCs. 4821da177e4SLinus Torvalds * --ANK (980905) 4831da177e4SLinus Torvalds */ 4841da177e4SLinus Torvalds 4851da177e4SLinus Torvalds inet = inet_sk(sk); 4861da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 4871da177e4SLinus Torvalds sk->sk_err = err; 4881da177e4SLinus Torvalds sk->sk_error_report(sk); 4891da177e4SLinus Torvalds } else { /* Only an error on timeout */ 4901da177e4SLinus Torvalds sk->sk_err_soft = err; 4911da177e4SLinus Torvalds } 4921da177e4SLinus Torvalds 4931da177e4SLinus Torvalds out: 4941da177e4SLinus Torvalds bh_unlock_sock(sk); 4951da177e4SLinus Torvalds sock_put(sk); 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds 4981da177e4SLinus Torvalds /* This routine computes an IPv4 TCP checksum. */ 4998292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 5001da177e4SLinus Torvalds { 5011da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 5028292a17aSArnaldo Carvalho de Melo struct tcphdr *th = skb->h.th; 5031da177e4SLinus Torvalds 50484fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 505ba7808eaSFrederik Deweerdt th->check = ~tcp_v4_check(len, inet->saddr, 506ba7808eaSFrederik Deweerdt inet->daddr, 0); 507ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5081da177e4SLinus Torvalds } else { 509ba7808eaSFrederik Deweerdt th->check = tcp_v4_check(len, inet->saddr, inet->daddr, 5101da177e4SLinus Torvalds csum_partial((char *)th, 5111da177e4SLinus Torvalds th->doff << 2, 5121da177e4SLinus Torvalds skb->csum)); 5131da177e4SLinus Torvalds } 5141da177e4SLinus Torvalds } 5151da177e4SLinus Torvalds 516a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb) 517a430a43dSHerbert Xu { 518eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 519a430a43dSHerbert Xu struct tcphdr *th; 520a430a43dSHerbert Xu 521a430a43dSHerbert Xu if (!pskb_may_pull(skb, sizeof(*th))) 522a430a43dSHerbert Xu return -EINVAL; 523a430a43dSHerbert Xu 524eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 525a430a43dSHerbert Xu th = skb->h.th; 526a430a43dSHerbert Xu 527a430a43dSHerbert Xu th->check = 0; 528ba7808eaSFrederik Deweerdt th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); 529ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 53084fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_PARTIAL; 531a430a43dSHerbert Xu return 0; 532a430a43dSHerbert Xu } 533a430a43dSHerbert Xu 5341da177e4SLinus Torvalds /* 5351da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5361da177e4SLinus Torvalds * 5371da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5381da177e4SLinus Torvalds * for reset. 5391da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5401da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5411da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5421da177e4SLinus Torvalds * So that we build reply only basing on parameters 5431da177e4SLinus Torvalds * arrived with segment. 5441da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5451da177e4SLinus Torvalds */ 5461da177e4SLinus Torvalds 547cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 5481da177e4SLinus Torvalds { 5491da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 550cfb6eeb4SYOSHIFUJI Hideaki struct { 551cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 552cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 553714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 554cfb6eeb4SYOSHIFUJI Hideaki #endif 555cfb6eeb4SYOSHIFUJI Hideaki } rep; 5561da177e4SLinus Torvalds struct ip_reply_arg arg; 557cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 558cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 559cfb6eeb4SYOSHIFUJI Hideaki #endif 5601da177e4SLinus Torvalds 5611da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 5621da177e4SLinus Torvalds if (th->rst) 5631da177e4SLinus Torvalds return; 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) 5661da177e4SLinus Torvalds return; 5671da177e4SLinus Torvalds 5681da177e4SLinus Torvalds /* Swap the send and the receive. */ 569cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 570cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 571cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 572cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 573cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds if (th->ack) { 576cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 5771da177e4SLinus Torvalds } else { 578cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 579cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 5801da177e4SLinus Torvalds skb->len - (th->doff << 2)); 5811da177e4SLinus Torvalds } 5821da177e4SLinus Torvalds 5837174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 584cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 585cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 586cfb6eeb4SYOSHIFUJI Hideaki 587cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 588eddc9ec5SArnaldo Carvalho de Melo key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; 589cfb6eeb4SYOSHIFUJI Hideaki if (key) { 590cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 591cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 592cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 593cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 594cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 595cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 596cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 597cfb6eeb4SYOSHIFUJI Hideaki 598cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], 599cfb6eeb4SYOSHIFUJI Hideaki key, 600eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->daddr, 601eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 602cfb6eeb4SYOSHIFUJI Hideaki &rep.th, IPPROTO_TCP, 603cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len); 604cfb6eeb4SYOSHIFUJI Hideaki } 605cfb6eeb4SYOSHIFUJI Hideaki #endif 606eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 607eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 6081da177e4SLinus Torvalds sizeof(struct tcphdr), IPPROTO_TCP, 0); 6091da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 6101da177e4SLinus Torvalds 611cfb6eeb4SYOSHIFUJI Hideaki ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 6121da177e4SLinus Torvalds 6131da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 6141da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 6151da177e4SLinus Torvalds } 6161da177e4SLinus Torvalds 6171da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 6181da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 6191da177e4SLinus Torvalds */ 6201da177e4SLinus Torvalds 621cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, 622cfb6eeb4SYOSHIFUJI Hideaki struct sk_buff *skb, u32 seq, u32 ack, 6231da177e4SLinus Torvalds u32 win, u32 ts) 6241da177e4SLinus Torvalds { 6251da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 6261da177e4SLinus Torvalds struct { 6271da177e4SLinus Torvalds struct tcphdr th; 628714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 629cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 630cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 631cfb6eeb4SYOSHIFUJI Hideaki #endif 632cfb6eeb4SYOSHIFUJI Hideaki ]; 6331da177e4SLinus Torvalds } rep; 6341da177e4SLinus Torvalds struct ip_reply_arg arg; 635cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 636cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 637cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key tw_key; 638cfb6eeb4SYOSHIFUJI Hideaki #endif 6391da177e4SLinus Torvalds 6401da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 6417174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 6421da177e4SLinus Torvalds 6431da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 6441da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 6451da177e4SLinus Torvalds if (ts) { 646cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 6471da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 6481da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 649cfb6eeb4SYOSHIFUJI Hideaki rep.opt[1] = htonl(tcp_time_stamp); 650cfb6eeb4SYOSHIFUJI Hideaki rep.opt[2] = htonl(ts); 651cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 6521da177e4SLinus Torvalds } 6531da177e4SLinus Torvalds 6541da177e4SLinus Torvalds /* Swap the send and the receive. */ 6551da177e4SLinus Torvalds rep.th.dest = th->source; 6561da177e4SLinus Torvalds rep.th.source = th->dest; 6571da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 6581da177e4SLinus Torvalds rep.th.seq = htonl(seq); 6591da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 6601da177e4SLinus Torvalds rep.th.ack = 1; 6611da177e4SLinus Torvalds rep.th.window = htons(win); 6621da177e4SLinus Torvalds 663cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 664cfb6eeb4SYOSHIFUJI Hideaki /* 665cfb6eeb4SYOSHIFUJI Hideaki * The SKB holds an imcoming packet, but may not have a valid ->sk 666cfb6eeb4SYOSHIFUJI Hideaki * pointer. This is especially the case when we're dealing with a 667cfb6eeb4SYOSHIFUJI Hideaki * TIME_WAIT ack, because the sk structure is long gone, and only 668cfb6eeb4SYOSHIFUJI Hideaki * the tcp_timewait_sock remains. So the md5 key is stashed in that 669cfb6eeb4SYOSHIFUJI Hideaki * structure, and we use it in preference. I believe that (twsk || 670cfb6eeb4SYOSHIFUJI Hideaki * skb->sk) holds true, but we program defensively. 671cfb6eeb4SYOSHIFUJI Hideaki */ 672cfb6eeb4SYOSHIFUJI Hideaki if (!twsk && skb->sk) { 673eddc9ec5SArnaldo Carvalho de Melo key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); 674cfb6eeb4SYOSHIFUJI Hideaki } else if (twsk && twsk->tw_md5_keylen) { 675cfb6eeb4SYOSHIFUJI Hideaki tw_key.key = twsk->tw_md5_key; 676cfb6eeb4SYOSHIFUJI Hideaki tw_key.keylen = twsk->tw_md5_keylen; 677cfb6eeb4SYOSHIFUJI Hideaki key = &tw_key; 6787174259eSArnaldo Carvalho de Melo } else 679cfb6eeb4SYOSHIFUJI Hideaki key = NULL; 680cfb6eeb4SYOSHIFUJI Hideaki 681cfb6eeb4SYOSHIFUJI Hideaki if (key) { 682cfb6eeb4SYOSHIFUJI Hideaki int offset = (ts) ? 3 : 0; 683cfb6eeb4SYOSHIFUJI Hideaki 684cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 685cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 686cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 687cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 688cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 689cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 690cfb6eeb4SYOSHIFUJI Hideaki 691cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], 692cfb6eeb4SYOSHIFUJI Hideaki key, 693eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->daddr, 694eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 695cfb6eeb4SYOSHIFUJI Hideaki &rep.th, IPPROTO_TCP, 696cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len); 697cfb6eeb4SYOSHIFUJI Hideaki } 698cfb6eeb4SYOSHIFUJI Hideaki #endif 699eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 700eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7011da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7021da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 7071da177e4SLinus Torvalds } 7081da177e4SLinus Torvalds 7091da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7101da177e4SLinus Torvalds { 7118feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 712cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7131da177e4SLinus Torvalds 714cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7157174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 7167174259eSArnaldo Carvalho de Melo tcptw->tw_ts_recent); 7171da177e4SLinus Torvalds 7188feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds 7217174259eSArnaldo Carvalho de Melo static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, 7227174259eSArnaldo Carvalho de Melo struct request_sock *req) 7231da177e4SLinus Torvalds { 724cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, 725cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 7261da177e4SLinus Torvalds req->ts_recent); 7271da177e4SLinus Torvalds } 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds /* 7301da177e4SLinus Torvalds * Send a SYN-ACK after having received an ACK. 73160236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 7321da177e4SLinus Torvalds * socket. 7331da177e4SLinus Torvalds */ 73460236fddSArnaldo Carvalho de Melo static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 7351da177e4SLinus Torvalds struct dst_entry *dst) 7361da177e4SLinus Torvalds { 7372e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 7381da177e4SLinus Torvalds int err = -1; 7391da177e4SLinus Torvalds struct sk_buff * skb; 7401da177e4SLinus Torvalds 7411da177e4SLinus Torvalds /* First, grab a route. */ 742463c84b9SArnaldo Carvalho de Melo if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 7431da177e4SLinus Torvalds goto out; 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds skb = tcp_make_synack(sk, dst, req); 7461da177e4SLinus Torvalds 7471da177e4SLinus Torvalds if (skb) { 7481da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 7491da177e4SLinus Torvalds 750ba7808eaSFrederik Deweerdt th->check = tcp_v4_check(skb->len, 7512e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 7522e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 7531da177e4SLinus Torvalds csum_partial((char *)th, skb->len, 7541da177e4SLinus Torvalds skb->csum)); 7551da177e4SLinus Torvalds 7562e6599cbSArnaldo Carvalho de Melo err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 7572e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 7582e6599cbSArnaldo Carvalho de Melo ireq->opt); 759b9df3cb8SGerrit Renker err = net_xmit_eval(err); 7601da177e4SLinus Torvalds } 7611da177e4SLinus Torvalds 7621da177e4SLinus Torvalds out: 7631da177e4SLinus Torvalds dst_release(dst); 7641da177e4SLinus Torvalds return err; 7651da177e4SLinus Torvalds } 7661da177e4SLinus Torvalds 7671da177e4SLinus Torvalds /* 76860236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 7691da177e4SLinus Torvalds */ 77060236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 7711da177e4SLinus Torvalds { 7722e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 7731da177e4SLinus Torvalds } 7741da177e4SLinus Torvalds 77580e40daaSArnaldo Carvalho de Melo #ifdef CONFIG_SYN_COOKIES 77640efc6faSStephen Hemminger static void syn_flood_warning(struct sk_buff *skb) 7771da177e4SLinus Torvalds { 7781da177e4SLinus Torvalds static unsigned long warntime; 7791da177e4SLinus Torvalds 7801da177e4SLinus Torvalds if (time_after(jiffies, (warntime + HZ * 60))) { 7811da177e4SLinus Torvalds warntime = jiffies; 7821da177e4SLinus Torvalds printk(KERN_INFO 7831da177e4SLinus Torvalds "possible SYN flooding on port %d. Sending cookies.\n", 7841da177e4SLinus Torvalds ntohs(skb->h.th->dest)); 7851da177e4SLinus Torvalds } 7861da177e4SLinus Torvalds } 78780e40daaSArnaldo Carvalho de Melo #endif 7881da177e4SLinus Torvalds 7891da177e4SLinus Torvalds /* 79060236fddSArnaldo Carvalho de Melo * Save and compile IPv4 options into the request_sock if needed. 7911da177e4SLinus Torvalds */ 79240efc6faSStephen Hemminger static struct ip_options *tcp_v4_save_options(struct sock *sk, 7931da177e4SLinus Torvalds struct sk_buff *skb) 7941da177e4SLinus Torvalds { 7951da177e4SLinus Torvalds struct ip_options *opt = &(IPCB(skb)->opt); 7961da177e4SLinus Torvalds struct ip_options *dopt = NULL; 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds if (opt && opt->optlen) { 7991da177e4SLinus Torvalds int opt_size = optlength(opt); 8001da177e4SLinus Torvalds dopt = kmalloc(opt_size, GFP_ATOMIC); 8011da177e4SLinus Torvalds if (dopt) { 8021da177e4SLinus Torvalds if (ip_options_echo(dopt, skb)) { 8031da177e4SLinus Torvalds kfree(dopt); 8041da177e4SLinus Torvalds dopt = NULL; 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds } 8081da177e4SLinus Torvalds return dopt; 8091da177e4SLinus Torvalds } 8101da177e4SLinus Torvalds 811cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 812cfb6eeb4SYOSHIFUJI Hideaki /* 813cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 814cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 815cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 816cfb6eeb4SYOSHIFUJI Hideaki */ 817cfb6eeb4SYOSHIFUJI Hideaki 818cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 8197174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key * 8207174259eSArnaldo Carvalho de Melo tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) 821cfb6eeb4SYOSHIFUJI Hideaki { 822cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 823cfb6eeb4SYOSHIFUJI Hideaki int i; 824cfb6eeb4SYOSHIFUJI Hideaki 825cfb6eeb4SYOSHIFUJI Hideaki if (!tp->md5sig_info || !tp->md5sig_info->entries4) 826cfb6eeb4SYOSHIFUJI Hideaki return NULL; 827cfb6eeb4SYOSHIFUJI Hideaki for (i = 0; i < tp->md5sig_info->entries4; i++) { 828cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info->keys4[i].addr == addr) 8297174259eSArnaldo Carvalho de Melo return (struct tcp_md5sig_key *) 8307174259eSArnaldo Carvalho de Melo &tp->md5sig_info->keys4[i]; 831cfb6eeb4SYOSHIFUJI Hideaki } 832cfb6eeb4SYOSHIFUJI Hideaki return NULL; 833cfb6eeb4SYOSHIFUJI Hideaki } 834cfb6eeb4SYOSHIFUJI Hideaki 835cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 836cfb6eeb4SYOSHIFUJI Hideaki struct sock *addr_sk) 837cfb6eeb4SYOSHIFUJI Hideaki { 838cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); 839cfb6eeb4SYOSHIFUJI Hideaki } 840cfb6eeb4SYOSHIFUJI Hideaki 841cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 842cfb6eeb4SYOSHIFUJI Hideaki 843f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 844cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req) 845cfb6eeb4SYOSHIFUJI Hideaki { 846cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); 847cfb6eeb4SYOSHIFUJI Hideaki } 848cfb6eeb4SYOSHIFUJI Hideaki 849cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 850cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, 851cfb6eeb4SYOSHIFUJI Hideaki u8 *newkey, u8 newkeylen) 852cfb6eeb4SYOSHIFUJI Hideaki { 853cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 854cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_md5sig_key *key; 855cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 856cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_md5sig_key *keys; 857cfb6eeb4SYOSHIFUJI Hideaki 858cfb6eeb4SYOSHIFUJI Hideaki key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); 859cfb6eeb4SYOSHIFUJI Hideaki if (key) { 860cfb6eeb4SYOSHIFUJI Hideaki /* Pre-existing entry - just update that one. */ 861cfb6eeb4SYOSHIFUJI Hideaki kfree(key->key); 862cfb6eeb4SYOSHIFUJI Hideaki key->key = newkey; 863cfb6eeb4SYOSHIFUJI Hideaki key->keylen = newkeylen; 864cfb6eeb4SYOSHIFUJI Hideaki } else { 865f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 866f6685938SArnaldo Carvalho de Melo 867cfb6eeb4SYOSHIFUJI Hideaki if (!tp->md5sig_info) { 868f6685938SArnaldo Carvalho de Melo tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), 869f6685938SArnaldo Carvalho de Melo GFP_ATOMIC); 870cfb6eeb4SYOSHIFUJI Hideaki if (!tp->md5sig_info) { 871cfb6eeb4SYOSHIFUJI Hideaki kfree(newkey); 872cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 873cfb6eeb4SYOSHIFUJI Hideaki } 874cfb6eeb4SYOSHIFUJI Hideaki } 875cfb6eeb4SYOSHIFUJI Hideaki if (tcp_alloc_md5sig_pool() == NULL) { 876cfb6eeb4SYOSHIFUJI Hideaki kfree(newkey); 877cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 878cfb6eeb4SYOSHIFUJI Hideaki } 879f6685938SArnaldo Carvalho de Melo md5sig = tp->md5sig_info; 880f6685938SArnaldo Carvalho de Melo 881f6685938SArnaldo Carvalho de Melo if (md5sig->alloced4 == md5sig->entries4) { 882f6685938SArnaldo Carvalho de Melo keys = kmalloc((sizeof(*keys) * 883f6685938SArnaldo Carvalho de Melo (md5sig->entries4 + 1)), GFP_ATOMIC); 884cfb6eeb4SYOSHIFUJI Hideaki if (!keys) { 885cfb6eeb4SYOSHIFUJI Hideaki kfree(newkey); 886cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 887cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 888cfb6eeb4SYOSHIFUJI Hideaki } 889cfb6eeb4SYOSHIFUJI Hideaki 890f6685938SArnaldo Carvalho de Melo if (md5sig->entries4) 891f6685938SArnaldo Carvalho de Melo memcpy(keys, md5sig->keys4, 892f6685938SArnaldo Carvalho de Melo sizeof(*keys) * md5sig->entries4); 893cfb6eeb4SYOSHIFUJI Hideaki 894cfb6eeb4SYOSHIFUJI Hideaki /* Free old key list, and reference new one */ 895f6685938SArnaldo Carvalho de Melo if (md5sig->keys4) 896f6685938SArnaldo Carvalho de Melo kfree(md5sig->keys4); 897f6685938SArnaldo Carvalho de Melo md5sig->keys4 = keys; 898f6685938SArnaldo Carvalho de Melo md5sig->alloced4++; 899cfb6eeb4SYOSHIFUJI Hideaki } 900f6685938SArnaldo Carvalho de Melo md5sig->entries4++; 901f6685938SArnaldo Carvalho de Melo md5sig->keys4[md5sig->entries4 - 1].addr = addr; 902f6685938SArnaldo Carvalho de Melo md5sig->keys4[md5sig->entries4 - 1].key = newkey; 903f6685938SArnaldo Carvalho de Melo md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen; 904cfb6eeb4SYOSHIFUJI Hideaki } 905cfb6eeb4SYOSHIFUJI Hideaki return 0; 906cfb6eeb4SYOSHIFUJI Hideaki } 907cfb6eeb4SYOSHIFUJI Hideaki 908cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_add); 909cfb6eeb4SYOSHIFUJI Hideaki 910cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 911cfb6eeb4SYOSHIFUJI Hideaki u8 *newkey, u8 newkeylen) 912cfb6eeb4SYOSHIFUJI Hideaki { 913cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, 914cfb6eeb4SYOSHIFUJI Hideaki newkey, newkeylen); 915cfb6eeb4SYOSHIFUJI Hideaki } 916cfb6eeb4SYOSHIFUJI Hideaki 917cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) 918cfb6eeb4SYOSHIFUJI Hideaki { 919cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 920cfb6eeb4SYOSHIFUJI Hideaki int i; 921cfb6eeb4SYOSHIFUJI Hideaki 922cfb6eeb4SYOSHIFUJI Hideaki for (i = 0; i < tp->md5sig_info->entries4; i++) { 923cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info->keys4[i].addr == addr) { 924cfb6eeb4SYOSHIFUJI Hideaki /* Free the key */ 925cfb6eeb4SYOSHIFUJI Hideaki kfree(tp->md5sig_info->keys4[i].key); 926cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info->entries4--; 927cfb6eeb4SYOSHIFUJI Hideaki 928cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info->entries4 == 0) { 929cfb6eeb4SYOSHIFUJI Hideaki kfree(tp->md5sig_info->keys4); 930cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info->keys4 = NULL; 9318228a18dSLeigh Brown tp->md5sig_info->alloced4 = 0; 9327174259eSArnaldo Carvalho de Melo } else if (tp->md5sig_info->entries4 != i) { 933cfb6eeb4SYOSHIFUJI Hideaki /* Need to do some manipulation */ 934cfb6eeb4SYOSHIFUJI Hideaki memcpy(&tp->md5sig_info->keys4[i], 935cfb6eeb4SYOSHIFUJI Hideaki &tp->md5sig_info->keys4[i+1], 9367174259eSArnaldo Carvalho de Melo (tp->md5sig_info->entries4 - i) * 9377174259eSArnaldo Carvalho de Melo sizeof(struct tcp4_md5sig_key)); 938cfb6eeb4SYOSHIFUJI Hideaki } 939cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 940cfb6eeb4SYOSHIFUJI Hideaki return 0; 941cfb6eeb4SYOSHIFUJI Hideaki } 942cfb6eeb4SYOSHIFUJI Hideaki } 943cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 944cfb6eeb4SYOSHIFUJI Hideaki } 945cfb6eeb4SYOSHIFUJI Hideaki 946cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_del); 947cfb6eeb4SYOSHIFUJI Hideaki 948cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_clear_md5_list(struct sock *sk) 949cfb6eeb4SYOSHIFUJI Hideaki { 950cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 951cfb6eeb4SYOSHIFUJI Hideaki 952cfb6eeb4SYOSHIFUJI Hideaki /* Free each key, then the set of key keys, 953cfb6eeb4SYOSHIFUJI Hideaki * the crypto element, and then decrement our 954cfb6eeb4SYOSHIFUJI Hideaki * hold on the last resort crypto. 955cfb6eeb4SYOSHIFUJI Hideaki */ 956cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info->entries4) { 957cfb6eeb4SYOSHIFUJI Hideaki int i; 958cfb6eeb4SYOSHIFUJI Hideaki for (i = 0; i < tp->md5sig_info->entries4; i++) 959cfb6eeb4SYOSHIFUJI Hideaki kfree(tp->md5sig_info->keys4[i].key); 960cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info->entries4 = 0; 961cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 962cfb6eeb4SYOSHIFUJI Hideaki } 963cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info->keys4) { 964cfb6eeb4SYOSHIFUJI Hideaki kfree(tp->md5sig_info->keys4); 965cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info->keys4 = NULL; 966cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info->alloced4 = 0; 967cfb6eeb4SYOSHIFUJI Hideaki } 968cfb6eeb4SYOSHIFUJI Hideaki } 969cfb6eeb4SYOSHIFUJI Hideaki 970cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 971cfb6eeb4SYOSHIFUJI Hideaki int optlen) 972cfb6eeb4SYOSHIFUJI Hideaki { 973cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 974cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 975cfb6eeb4SYOSHIFUJI Hideaki u8 *newkey; 976cfb6eeb4SYOSHIFUJI Hideaki 977cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 978cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 979cfb6eeb4SYOSHIFUJI Hideaki 980cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 981cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 982cfb6eeb4SYOSHIFUJI Hideaki 983cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 984cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 985cfb6eeb4SYOSHIFUJI Hideaki 986cfb6eeb4SYOSHIFUJI Hideaki if (!cmd.tcpm_key || !cmd.tcpm_keylen) { 987cfb6eeb4SYOSHIFUJI Hideaki if (!tcp_sk(sk)->md5sig_info) 988cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 989cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); 990cfb6eeb4SYOSHIFUJI Hideaki } 991cfb6eeb4SYOSHIFUJI Hideaki 992cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 993cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 994cfb6eeb4SYOSHIFUJI Hideaki 995cfb6eeb4SYOSHIFUJI Hideaki if (!tcp_sk(sk)->md5sig_info) { 996cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 9977174259eSArnaldo Carvalho de Melo struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); 998cfb6eeb4SYOSHIFUJI Hideaki 999cfb6eeb4SYOSHIFUJI Hideaki if (!p) 1000cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1001cfb6eeb4SYOSHIFUJI Hideaki 1002cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = p; 1003cfb6eeb4SYOSHIFUJI Hideaki 1004cfb6eeb4SYOSHIFUJI Hideaki } 1005cfb6eeb4SYOSHIFUJI Hideaki 1006f6685938SArnaldo Carvalho de Melo newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 1007cfb6eeb4SYOSHIFUJI Hideaki if (!newkey) 1008cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 1009cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, 1010cfb6eeb4SYOSHIFUJI Hideaki newkey, cmd.tcpm_keylen); 1011cfb6eeb4SYOSHIFUJI Hideaki } 1012cfb6eeb4SYOSHIFUJI Hideaki 1013cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 1014cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, __be32 daddr, 1015cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr *th, int protocol, 1016cfb6eeb4SYOSHIFUJI Hideaki int tcplen) 1017cfb6eeb4SYOSHIFUJI Hideaki { 1018cfb6eeb4SYOSHIFUJI Hideaki struct scatterlist sg[4]; 1019cfb6eeb4SYOSHIFUJI Hideaki __u16 data_len; 1020cfb6eeb4SYOSHIFUJI Hideaki int block = 0; 10218e5200f5SAl Viro __sum16 old_checksum; 1022cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_pool *hp; 1023cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 1024cfb6eeb4SYOSHIFUJI Hideaki struct hash_desc *desc; 1025cfb6eeb4SYOSHIFUJI Hideaki int err; 1026cfb6eeb4SYOSHIFUJI Hideaki unsigned int nbytes = 0; 1027cfb6eeb4SYOSHIFUJI Hideaki 1028cfb6eeb4SYOSHIFUJI Hideaki /* 1029cfb6eeb4SYOSHIFUJI Hideaki * Okay, so RFC2385 is turned on for this connection, 1030cfb6eeb4SYOSHIFUJI Hideaki * so we need to generate the MD5 hash for the packet now. 1031cfb6eeb4SYOSHIFUJI Hideaki */ 1032cfb6eeb4SYOSHIFUJI Hideaki 1033cfb6eeb4SYOSHIFUJI Hideaki hp = tcp_get_md5sig_pool(); 1034cfb6eeb4SYOSHIFUJI Hideaki if (!hp) 1035cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash_noput; 1036cfb6eeb4SYOSHIFUJI Hideaki 1037cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1038cfb6eeb4SYOSHIFUJI Hideaki desc = &hp->md5_desc; 1039cfb6eeb4SYOSHIFUJI Hideaki 1040cfb6eeb4SYOSHIFUJI Hideaki /* 1041cfb6eeb4SYOSHIFUJI Hideaki * 1. the TCP pseudo-header (in the order: source IP address, 1042cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1043cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1044cfb6eeb4SYOSHIFUJI Hideaki */ 1045cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1046cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1047cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1048cfb6eeb4SYOSHIFUJI Hideaki bp->protocol = protocol; 1049cfb6eeb4SYOSHIFUJI Hideaki bp->len = htons(tcplen); 1050cfb6eeb4SYOSHIFUJI Hideaki sg_set_buf(&sg[block++], bp, sizeof(*bp)); 1051cfb6eeb4SYOSHIFUJI Hideaki nbytes += sizeof(*bp); 1052cfb6eeb4SYOSHIFUJI Hideaki 1053cfb6eeb4SYOSHIFUJI Hideaki /* 2. the TCP header, excluding options, and assuming a 1054cfb6eeb4SYOSHIFUJI Hideaki * checksum of zero/ 1055cfb6eeb4SYOSHIFUJI Hideaki */ 1056cfb6eeb4SYOSHIFUJI Hideaki old_checksum = th->check; 1057cfb6eeb4SYOSHIFUJI Hideaki th->check = 0; 1058cfb6eeb4SYOSHIFUJI Hideaki sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); 1059cfb6eeb4SYOSHIFUJI Hideaki nbytes += sizeof(struct tcphdr); 106008dd1a50SDavid S. Miller 1061cfb6eeb4SYOSHIFUJI Hideaki /* 3. the TCP segment data (if any) */ 1062cfb6eeb4SYOSHIFUJI Hideaki data_len = tcplen - (th->doff << 2); 1063cfb6eeb4SYOSHIFUJI Hideaki if (data_len > 0) { 1064cfb6eeb4SYOSHIFUJI Hideaki unsigned char *data = (unsigned char *)th + (th->doff << 2); 1065cfb6eeb4SYOSHIFUJI Hideaki sg_set_buf(&sg[block++], data, data_len); 1066cfb6eeb4SYOSHIFUJI Hideaki nbytes += data_len; 1067cfb6eeb4SYOSHIFUJI Hideaki } 1068cfb6eeb4SYOSHIFUJI Hideaki 1069cfb6eeb4SYOSHIFUJI Hideaki /* 4. an independently-specified key or password, known to both 1070cfb6eeb4SYOSHIFUJI Hideaki * TCPs and presumably connection-specific 1071cfb6eeb4SYOSHIFUJI Hideaki */ 1072cfb6eeb4SYOSHIFUJI Hideaki sg_set_buf(&sg[block++], key->key, key->keylen); 1073cfb6eeb4SYOSHIFUJI Hideaki nbytes += key->keylen; 1074cfb6eeb4SYOSHIFUJI Hideaki 1075cfb6eeb4SYOSHIFUJI Hideaki /* Now store the Hash into the packet */ 1076cfb6eeb4SYOSHIFUJI Hideaki err = crypto_hash_init(desc); 1077cfb6eeb4SYOSHIFUJI Hideaki if (err) 1078cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1079cfb6eeb4SYOSHIFUJI Hideaki err = crypto_hash_update(desc, sg, nbytes); 1080cfb6eeb4SYOSHIFUJI Hideaki if (err) 1081cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1082cfb6eeb4SYOSHIFUJI Hideaki err = crypto_hash_final(desc, md5_hash); 1083cfb6eeb4SYOSHIFUJI Hideaki if (err) 1084cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1085cfb6eeb4SYOSHIFUJI Hideaki 1086cfb6eeb4SYOSHIFUJI Hideaki /* Reset header, and free up the crypto */ 1087cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1088cfb6eeb4SYOSHIFUJI Hideaki th->check = old_checksum; 1089cfb6eeb4SYOSHIFUJI Hideaki 1090cfb6eeb4SYOSHIFUJI Hideaki out: 1091cfb6eeb4SYOSHIFUJI Hideaki return 0; 1092cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1093cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1094cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1095cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 1096cfb6eeb4SYOSHIFUJI Hideaki goto out; 1097cfb6eeb4SYOSHIFUJI Hideaki } 1098cfb6eeb4SYOSHIFUJI Hideaki 1099cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 1100cfb6eeb4SYOSHIFUJI Hideaki struct sock *sk, 1101cfb6eeb4SYOSHIFUJI Hideaki struct dst_entry *dst, 1102cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req, 1103cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr *th, int protocol, 1104cfb6eeb4SYOSHIFUJI Hideaki int tcplen) 1105cfb6eeb4SYOSHIFUJI Hideaki { 1106cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1107cfb6eeb4SYOSHIFUJI Hideaki 1108cfb6eeb4SYOSHIFUJI Hideaki if (sk) { 1109cfb6eeb4SYOSHIFUJI Hideaki saddr = inet_sk(sk)->saddr; 1110cfb6eeb4SYOSHIFUJI Hideaki daddr = inet_sk(sk)->daddr; 1111cfb6eeb4SYOSHIFUJI Hideaki } else { 1112cfb6eeb4SYOSHIFUJI Hideaki struct rtable *rt = (struct rtable *)dst; 1113cfb6eeb4SYOSHIFUJI Hideaki BUG_ON(!rt); 1114cfb6eeb4SYOSHIFUJI Hideaki saddr = rt->rt_src; 1115cfb6eeb4SYOSHIFUJI Hideaki daddr = rt->rt_dst; 1116cfb6eeb4SYOSHIFUJI Hideaki } 1117cfb6eeb4SYOSHIFUJI Hideaki return tcp_v4_do_calc_md5_hash(md5_hash, key, 1118cfb6eeb4SYOSHIFUJI Hideaki saddr, daddr, 1119cfb6eeb4SYOSHIFUJI Hideaki th, protocol, tcplen); 1120cfb6eeb4SYOSHIFUJI Hideaki } 1121cfb6eeb4SYOSHIFUJI Hideaki 1122cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_calc_md5_hash); 1123cfb6eeb4SYOSHIFUJI Hideaki 1124cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1125cfb6eeb4SYOSHIFUJI Hideaki { 1126cfb6eeb4SYOSHIFUJI Hideaki /* 1127cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1128cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1129cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1130cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1131cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1132cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1133cfb6eeb4SYOSHIFUJI Hideaki */ 1134cfb6eeb4SYOSHIFUJI Hideaki __u8 *hash_location = NULL; 1135cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1136eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1137cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr *th = skb->h.th; 1138cfb6eeb4SYOSHIFUJI Hideaki int length = (th->doff << 2) - sizeof(struct tcphdr); 1139cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1140cfb6eeb4SYOSHIFUJI Hideaki unsigned char *ptr; 1141cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1142cfb6eeb4SYOSHIFUJI Hideaki 1143cfb6eeb4SYOSHIFUJI Hideaki hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); 1144cfb6eeb4SYOSHIFUJI Hideaki 1145cfb6eeb4SYOSHIFUJI Hideaki /* 1146cfb6eeb4SYOSHIFUJI Hideaki * If the TCP option length is less than the TCP_MD5SIG 1147cfb6eeb4SYOSHIFUJI Hideaki * option length, then we can shortcut 1148cfb6eeb4SYOSHIFUJI Hideaki */ 1149cfb6eeb4SYOSHIFUJI Hideaki if (length < TCPOLEN_MD5SIG) { 1150cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected) 1151cfb6eeb4SYOSHIFUJI Hideaki return 1; 1152cfb6eeb4SYOSHIFUJI Hideaki else 1153cfb6eeb4SYOSHIFUJI Hideaki return 0; 1154cfb6eeb4SYOSHIFUJI Hideaki } 1155cfb6eeb4SYOSHIFUJI Hideaki 1156cfb6eeb4SYOSHIFUJI Hideaki /* Okay, we can't shortcut - we have to grub through the options */ 1157cfb6eeb4SYOSHIFUJI Hideaki ptr = (unsigned char *)(th + 1); 1158cfb6eeb4SYOSHIFUJI Hideaki while (length > 0) { 1159cfb6eeb4SYOSHIFUJI Hideaki int opcode = *ptr++; 1160cfb6eeb4SYOSHIFUJI Hideaki int opsize; 1161cfb6eeb4SYOSHIFUJI Hideaki 1162cfb6eeb4SYOSHIFUJI Hideaki switch (opcode) { 1163cfb6eeb4SYOSHIFUJI Hideaki case TCPOPT_EOL: 1164cfb6eeb4SYOSHIFUJI Hideaki goto done_opts; 1165cfb6eeb4SYOSHIFUJI Hideaki case TCPOPT_NOP: 1166cfb6eeb4SYOSHIFUJI Hideaki length--; 1167cfb6eeb4SYOSHIFUJI Hideaki continue; 1168cfb6eeb4SYOSHIFUJI Hideaki default: 1169cfb6eeb4SYOSHIFUJI Hideaki opsize = *ptr++; 1170cfb6eeb4SYOSHIFUJI Hideaki if (opsize < 2) 1171cfb6eeb4SYOSHIFUJI Hideaki goto done_opts; 1172cfb6eeb4SYOSHIFUJI Hideaki if (opsize > length) 1173cfb6eeb4SYOSHIFUJI Hideaki goto done_opts; 1174cfb6eeb4SYOSHIFUJI Hideaki 1175cfb6eeb4SYOSHIFUJI Hideaki if (opcode == TCPOPT_MD5SIG) { 1176cfb6eeb4SYOSHIFUJI Hideaki hash_location = ptr; 1177cfb6eeb4SYOSHIFUJI Hideaki goto done_opts; 1178cfb6eeb4SYOSHIFUJI Hideaki } 1179cfb6eeb4SYOSHIFUJI Hideaki } 1180cfb6eeb4SYOSHIFUJI Hideaki ptr += opsize-2; 1181cfb6eeb4SYOSHIFUJI Hideaki length -= opsize; 1182cfb6eeb4SYOSHIFUJI Hideaki } 1183cfb6eeb4SYOSHIFUJI Hideaki done_opts: 1184cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1185cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1186cfb6eeb4SYOSHIFUJI Hideaki return 0; 1187cfb6eeb4SYOSHIFUJI Hideaki 1188cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1189a9fc00ccSLeigh Brown LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " 1190cfb6eeb4SYOSHIFUJI Hideaki "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", 1191cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->saddr), ntohs(th->source), 1192cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->daddr), ntohs(th->dest)); 1193cfb6eeb4SYOSHIFUJI Hideaki return 1; 1194cfb6eeb4SYOSHIFUJI Hideaki } 1195cfb6eeb4SYOSHIFUJI Hideaki 1196cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 11977174259eSArnaldo Carvalho de Melo LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " 1198cfb6eeb4SYOSHIFUJI Hideaki "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", 1199cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->saddr), ntohs(th->source), 1200cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->daddr), ntohs(th->dest)); 1201cfb6eeb4SYOSHIFUJI Hideaki return 1; 1202cfb6eeb4SYOSHIFUJI Hideaki } 1203cfb6eeb4SYOSHIFUJI Hideaki 1204cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1205cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1206cfb6eeb4SYOSHIFUJI Hideaki */ 1207cfb6eeb4SYOSHIFUJI Hideaki genhash = tcp_v4_do_calc_md5_hash(newhash, 1208cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 1209cfb6eeb4SYOSHIFUJI Hideaki iph->saddr, iph->daddr, 1210cfb6eeb4SYOSHIFUJI Hideaki th, sk->sk_protocol, 1211cfb6eeb4SYOSHIFUJI Hideaki skb->len); 1212cfb6eeb4SYOSHIFUJI Hideaki 1213cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1214cfb6eeb4SYOSHIFUJI Hideaki if (net_ratelimit()) { 1215cfb6eeb4SYOSHIFUJI Hideaki printk(KERN_INFO "MD5 Hash failed for " 1216cfb6eeb4SYOSHIFUJI Hideaki "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", 1217cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->saddr), ntohs(th->source), 1218cfb6eeb4SYOSHIFUJI Hideaki NIPQUAD(iph->daddr), ntohs(th->dest), 1219cfb6eeb4SYOSHIFUJI Hideaki genhash ? " tcp_v4_calc_md5_hash failed" : ""); 1220cfb6eeb4SYOSHIFUJI Hideaki } 1221cfb6eeb4SYOSHIFUJI Hideaki return 1; 1222cfb6eeb4SYOSHIFUJI Hideaki } 1223cfb6eeb4SYOSHIFUJI Hideaki return 0; 1224cfb6eeb4SYOSHIFUJI Hideaki } 1225cfb6eeb4SYOSHIFUJI Hideaki 1226cfb6eeb4SYOSHIFUJI Hideaki #endif 1227cfb6eeb4SYOSHIFUJI Hideaki 122872a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12291da177e4SLinus Torvalds .family = PF_INET, 12302e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 12311da177e4SLinus Torvalds .rtx_syn_ack = tcp_v4_send_synack, 123260236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 123360236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12341da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 12351da177e4SLinus Torvalds }; 12361da177e4SLinus Torvalds 1237cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1238b6332e6cSAndrew Morton static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1239cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_reqsk_md5_lookup, 1240cfb6eeb4SYOSHIFUJI Hideaki }; 1241b6332e6cSAndrew Morton #endif 1242cfb6eeb4SYOSHIFUJI Hideaki 12436d6ee43eSArnaldo Carvalho de Melo static struct timewait_sock_ops tcp_timewait_sock_ops = { 12446d6ee43eSArnaldo Carvalho de Melo .twsk_obj_size = sizeof(struct tcp_timewait_sock), 12456d6ee43eSArnaldo Carvalho de Melo .twsk_unique = tcp_twsk_unique, 1246cfb6eeb4SYOSHIFUJI Hideaki .twsk_destructor= tcp_twsk_destructor, 12476d6ee43eSArnaldo Carvalho de Melo }; 12486d6ee43eSArnaldo Carvalho de Melo 12491da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12501da177e4SLinus Torvalds { 12512e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 12521da177e4SLinus Torvalds struct tcp_options_received tmp_opt; 125360236fddSArnaldo Carvalho de Melo struct request_sock *req; 1254eddc9ec5SArnaldo Carvalho de Melo __be32 saddr = ip_hdr(skb)->saddr; 1255eddc9ec5SArnaldo Carvalho de Melo __be32 daddr = ip_hdr(skb)->daddr; 12561da177e4SLinus Torvalds __u32 isn = TCP_SKB_CB(skb)->when; 12571da177e4SLinus Torvalds struct dst_entry *dst = NULL; 12581da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 12591da177e4SLinus Torvalds int want_cookie = 0; 12601da177e4SLinus Torvalds #else 12611da177e4SLinus Torvalds #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ 12621da177e4SLinus Torvalds #endif 12631da177e4SLinus Torvalds 12641da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 12651da177e4SLinus Torvalds if (((struct rtable *)skb->dst)->rt_flags & 12661da177e4SLinus Torvalds (RTCF_BROADCAST | RTCF_MULTICAST)) 12671da177e4SLinus Torvalds goto drop; 12681da177e4SLinus Torvalds 12691da177e4SLinus Torvalds /* TW buckets are converted to open requests without 12701da177e4SLinus Torvalds * limitations, they conserve resources and peer is 12711da177e4SLinus Torvalds * evidently real one. 12721da177e4SLinus Torvalds */ 1273463c84b9SArnaldo Carvalho de Melo if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 12741da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 12751da177e4SLinus Torvalds if (sysctl_tcp_syncookies) { 12761da177e4SLinus Torvalds want_cookie = 1; 12771da177e4SLinus Torvalds } else 12781da177e4SLinus Torvalds #endif 12791da177e4SLinus Torvalds goto drop; 12801da177e4SLinus Torvalds } 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds /* Accept backlog is full. If we have already queued enough 12831da177e4SLinus Torvalds * of warm entries in syn queue, drop request. It is better than 12841da177e4SLinus Torvalds * clogging syn queue with openreqs with exponentially increasing 12851da177e4SLinus Torvalds * timeout. 12861da177e4SLinus Torvalds */ 1287463c84b9SArnaldo Carvalho de Melo if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 12881da177e4SLinus Torvalds goto drop; 12891da177e4SLinus Torvalds 129060236fddSArnaldo Carvalho de Melo req = reqsk_alloc(&tcp_request_sock_ops); 12911da177e4SLinus Torvalds if (!req) 12921da177e4SLinus Torvalds goto drop; 12931da177e4SLinus Torvalds 1294cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1295cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1296cfb6eeb4SYOSHIFUJI Hideaki #endif 1297cfb6eeb4SYOSHIFUJI Hideaki 12981da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 12991da177e4SLinus Torvalds tmp_opt.mss_clamp = 536; 13001da177e4SLinus Torvalds tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; 13011da177e4SLinus Torvalds 13021da177e4SLinus Torvalds tcp_parse_options(skb, &tmp_opt, 0); 13031da177e4SLinus Torvalds 13041da177e4SLinus Torvalds if (want_cookie) { 13051da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 13061da177e4SLinus Torvalds tmp_opt.saw_tstamp = 0; 13071da177e4SLinus Torvalds } 13081da177e4SLinus Torvalds 13091da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { 13101da177e4SLinus Torvalds /* Some OSes (unknown ones, but I see them on web server, which 13111da177e4SLinus Torvalds * contains information interesting only for windows' 13121da177e4SLinus Torvalds * users) do not send their stamp in SYN. It is easy case. 13131da177e4SLinus Torvalds * We simply do not advertise TS support. 13141da177e4SLinus Torvalds */ 13151da177e4SLinus Torvalds tmp_opt.saw_tstamp = 0; 13161da177e4SLinus Torvalds tmp_opt.tstamp_ok = 0; 13171da177e4SLinus Torvalds } 13181da177e4SLinus Torvalds tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 13191da177e4SLinus Torvalds 13201da177e4SLinus Torvalds tcp_openreq_init(req, &tmp_opt, skb); 13211da177e4SLinus Torvalds 13224237c75cSVenkat Yekkirala if (security_inet_conn_request(sk, skb, req)) 13234237c75cSVenkat Yekkirala goto drop_and_free; 13244237c75cSVenkat Yekkirala 13252e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 13262e6599cbSArnaldo Carvalho de Melo ireq->loc_addr = daddr; 13272e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr = saddr; 13282e6599cbSArnaldo Carvalho de Melo ireq->opt = tcp_v4_save_options(sk, skb); 13291da177e4SLinus Torvalds if (!want_cookie) 13301da177e4SLinus Torvalds TCP_ECN_create_request(req, skb->h.th); 13311da177e4SLinus Torvalds 13321da177e4SLinus Torvalds if (want_cookie) { 13331da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 13341da177e4SLinus Torvalds syn_flood_warning(skb); 13351da177e4SLinus Torvalds #endif 13361da177e4SLinus Torvalds isn = cookie_v4_init_sequence(sk, skb, &req->mss); 13371da177e4SLinus Torvalds } else if (!isn) { 13381da177e4SLinus Torvalds struct inet_peer *peer = NULL; 13391da177e4SLinus Torvalds 13401da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen 13411da177e4SLinus Torvalds * from the destination in peer table, when entering 13421da177e4SLinus Torvalds * state TIME-WAIT, and check against it before 13431da177e4SLinus Torvalds * accepting new connection request. 13441da177e4SLinus Torvalds * 13451da177e4SLinus Torvalds * If "isn" is not zero, this request hit alive 13461da177e4SLinus Torvalds * timewait bucket, so that all the necessary checks 13471da177e4SLinus Torvalds * are made in the function processing timewait state. 13481da177e4SLinus Torvalds */ 13491da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && 1350295ff7edSArnaldo Carvalho de Melo tcp_death_row.sysctl_tw_recycle && 1351463c84b9SArnaldo Carvalho de Melo (dst = inet_csk_route_req(sk, req)) != NULL && 13521da177e4SLinus Torvalds (peer = rt_get_peer((struct rtable *)dst)) != NULL && 13531da177e4SLinus Torvalds peer->v4daddr == saddr) { 13549d729f72SJames Morris if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && 13551da177e4SLinus Torvalds (s32)(peer->tcp_ts - req->ts_recent) > 13561da177e4SLinus Torvalds TCP_PAWS_WINDOW) { 13571da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 13581da177e4SLinus Torvalds dst_release(dst); 13591da177e4SLinus Torvalds goto drop_and_free; 13601da177e4SLinus Torvalds } 13611da177e4SLinus Torvalds } 13621da177e4SLinus Torvalds /* Kill the following clause, if you dislike this way. */ 13631da177e4SLinus Torvalds else if (!sysctl_tcp_syncookies && 1364463c84b9SArnaldo Carvalho de Melo (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 13651da177e4SLinus Torvalds (sysctl_max_syn_backlog >> 2)) && 13661da177e4SLinus Torvalds (!peer || !peer->tcp_ts_stamp) && 13671da177e4SLinus Torvalds (!dst || !dst_metric(dst, RTAX_RTT))) { 13681da177e4SLinus Torvalds /* Without syncookies last quarter of 13691da177e4SLinus Torvalds * backlog is filled with destinations, 13701da177e4SLinus Torvalds * proven to be alive. 13711da177e4SLinus Torvalds * It means that we continue to communicate 13721da177e4SLinus Torvalds * to destinations, already remembered 13731da177e4SLinus Torvalds * to the moment of synflood. 13741da177e4SLinus Torvalds */ 137564ce2073SPatrick McHardy LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " 137664ce2073SPatrick McHardy "request from %u.%u.%u.%u/%u\n", 13771da177e4SLinus Torvalds NIPQUAD(saddr), 137864ce2073SPatrick McHardy ntohs(skb->h.th->source)); 13791da177e4SLinus Torvalds dst_release(dst); 13801da177e4SLinus Torvalds goto drop_and_free; 13811da177e4SLinus Torvalds } 13821da177e4SLinus Torvalds 1383a94f723dSGerrit Renker isn = tcp_v4_init_sequence(skb); 13841da177e4SLinus Torvalds } 13852e6599cbSArnaldo Carvalho de Melo tcp_rsk(req)->snt_isn = isn; 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds if (tcp_v4_send_synack(sk, req, dst)) 13881da177e4SLinus Torvalds goto drop_and_free; 13891da177e4SLinus Torvalds 13901da177e4SLinus Torvalds if (want_cookie) { 139160236fddSArnaldo Carvalho de Melo reqsk_free(req); 13921da177e4SLinus Torvalds } else { 13933f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 13941da177e4SLinus Torvalds } 13951da177e4SLinus Torvalds return 0; 13961da177e4SLinus Torvalds 13971da177e4SLinus Torvalds drop_and_free: 139860236fddSArnaldo Carvalho de Melo reqsk_free(req); 13991da177e4SLinus Torvalds drop: 14001da177e4SLinus Torvalds return 0; 14011da177e4SLinus Torvalds } 14021da177e4SLinus Torvalds 14031da177e4SLinus Torvalds 14041da177e4SLinus Torvalds /* 14051da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 14061da177e4SLinus Torvalds * now create the new socket. 14071da177e4SLinus Torvalds */ 14081da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 140960236fddSArnaldo Carvalho de Melo struct request_sock *req, 14101da177e4SLinus Torvalds struct dst_entry *dst) 14111da177e4SLinus Torvalds { 14122e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 14131da177e4SLinus Torvalds struct inet_sock *newinet; 14141da177e4SLinus Torvalds struct tcp_sock *newtp; 14151da177e4SLinus Torvalds struct sock *newsk; 1416cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1417cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1418cfb6eeb4SYOSHIFUJI Hideaki #endif 14191da177e4SLinus Torvalds 14201da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 14211da177e4SLinus Torvalds goto exit_overflow; 14221da177e4SLinus Torvalds 1423463c84b9SArnaldo Carvalho de Melo if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 14241da177e4SLinus Torvalds goto exit; 14251da177e4SLinus Torvalds 14261da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 14271da177e4SLinus Torvalds if (!newsk) 14281da177e4SLinus Torvalds goto exit; 14291da177e4SLinus Torvalds 1430bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 14316cbb0df7SArnaldo Carvalho de Melo sk_setup_caps(newsk, dst); 14321da177e4SLinus Torvalds 14331da177e4SLinus Torvalds newtp = tcp_sk(newsk); 14341da177e4SLinus Torvalds newinet = inet_sk(newsk); 14352e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 14362e6599cbSArnaldo Carvalho de Melo newinet->daddr = ireq->rmt_addr; 14372e6599cbSArnaldo Carvalho de Melo newinet->rcv_saddr = ireq->loc_addr; 14382e6599cbSArnaldo Carvalho de Melo newinet->saddr = ireq->loc_addr; 14392e6599cbSArnaldo Carvalho de Melo newinet->opt = ireq->opt; 14402e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1441463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1442eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 1443d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 14441da177e4SLinus Torvalds if (newinet->opt) 1445d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 14461da177e4SLinus Torvalds newinet->id = newtp->write_seq ^ jiffies; 14471da177e4SLinus Torvalds 14485d424d5aSJohn Heffner tcp_mtup_init(newsk); 14491da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 14501da177e4SLinus Torvalds newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 14511da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 14521da177e4SLinus Torvalds 1453cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1454cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1455cfb6eeb4SYOSHIFUJI Hideaki if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { 1456cfb6eeb4SYOSHIFUJI Hideaki /* 1457cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1458cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1459cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1460cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1461cfb6eeb4SYOSHIFUJI Hideaki */ 1462f6685938SArnaldo Carvalho de Melo char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); 1463f6685938SArnaldo Carvalho de Melo if (newkey != NULL) 1464cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, 1465cfb6eeb4SYOSHIFUJI Hideaki newkey, key->keylen); 1466cfb6eeb4SYOSHIFUJI Hideaki } 1467cfb6eeb4SYOSHIFUJI Hideaki #endif 1468cfb6eeb4SYOSHIFUJI Hideaki 1469f3f05f70SArnaldo Carvalho de Melo __inet_hash(&tcp_hashinfo, newsk, 0); 14702d8c4ce5SArnaldo Carvalho de Melo __inet_inherit_port(&tcp_hashinfo, sk, newsk); 14711da177e4SLinus Torvalds 14721da177e4SLinus Torvalds return newsk; 14731da177e4SLinus Torvalds 14741da177e4SLinus Torvalds exit_overflow: 14751da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 14761da177e4SLinus Torvalds exit: 14771da177e4SLinus Torvalds NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 14781da177e4SLinus Torvalds dst_release(dst); 14791da177e4SLinus Torvalds return NULL; 14801da177e4SLinus Torvalds } 14811da177e4SLinus Torvalds 14821da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 14831da177e4SLinus Torvalds { 14841da177e4SLinus Torvalds struct tcphdr *th = skb->h.th; 1485eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 14861da177e4SLinus Torvalds struct sock *nsk; 148760236fddSArnaldo Carvalho de Melo struct request_sock **prev; 14881da177e4SLinus Torvalds /* Find possible connection requests. */ 1489463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 14901da177e4SLinus Torvalds iph->saddr, iph->daddr); 14911da177e4SLinus Torvalds if (req) 14921da177e4SLinus Torvalds return tcp_check_req(sk, skb, req, prev); 14931da177e4SLinus Torvalds 1494eddc9ec5SArnaldo Carvalho de Melo nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, 1495eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, inet_iif(skb)); 14961da177e4SLinus Torvalds 14971da177e4SLinus Torvalds if (nsk) { 14981da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 14991da177e4SLinus Torvalds bh_lock_sock(nsk); 15001da177e4SLinus Torvalds return nsk; 15011da177e4SLinus Torvalds } 15029469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 15031da177e4SLinus Torvalds return NULL; 15041da177e4SLinus Torvalds } 15051da177e4SLinus Torvalds 15061da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 15071da177e4SLinus Torvalds if (!th->rst && !th->syn && th->ack) 15081da177e4SLinus Torvalds sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 15091da177e4SLinus Torvalds #endif 15101da177e4SLinus Torvalds return sk; 15111da177e4SLinus Torvalds } 15121da177e4SLinus Torvalds 1513b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 15141da177e4SLinus Torvalds { 1515eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1516eddc9ec5SArnaldo Carvalho de Melo 151784fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_COMPLETE) { 1518eddc9ec5SArnaldo Carvalho de Melo if (!tcp_v4_check(skb->len, iph->saddr, 1519eddc9ec5SArnaldo Carvalho de Melo iph->daddr, skb->csum)) { 15201da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 1521fb286bb2SHerbert Xu return 0; 1522fb286bb2SHerbert Xu } 1523fb286bb2SHerbert Xu } 1524fb286bb2SHerbert Xu 1525eddc9ec5SArnaldo Carvalho de Melo skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1526fb286bb2SHerbert Xu skb->len, IPPROTO_TCP, 0); 1527fb286bb2SHerbert Xu 1528fb286bb2SHerbert Xu if (skb->len <= 76) { 1529fb286bb2SHerbert Xu return __skb_checksum_complete(skb); 15301da177e4SLinus Torvalds } 15311da177e4SLinus Torvalds return 0; 15321da177e4SLinus Torvalds } 15331da177e4SLinus Torvalds 15341da177e4SLinus Torvalds 15351da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 15361da177e4SLinus Torvalds * here. 15371da177e4SLinus Torvalds * 15381da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 15391da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 15401da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 15411da177e4SLinus Torvalds * held. 15421da177e4SLinus Torvalds */ 15431da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 15441da177e4SLinus Torvalds { 1545cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1546cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1547cfb6eeb4SYOSHIFUJI Hideaki /* 1548cfb6eeb4SYOSHIFUJI Hideaki * We really want to reject the packet as early as possible 1549cfb6eeb4SYOSHIFUJI Hideaki * if: 1550cfb6eeb4SYOSHIFUJI Hideaki * o We're expecting an MD5'd packet and this is no MD5 tcp option 1551cfb6eeb4SYOSHIFUJI Hideaki * o There is an MD5 option and we're not expecting one 1552cfb6eeb4SYOSHIFUJI Hideaki */ 1553cfb6eeb4SYOSHIFUJI Hideaki if (tcp_v4_inbound_md5_hash(sk, skb)) 1554cfb6eeb4SYOSHIFUJI Hideaki goto discard; 1555cfb6eeb4SYOSHIFUJI Hideaki #endif 1556cfb6eeb4SYOSHIFUJI Hideaki 15571da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 15581da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 1559cfb6eeb4SYOSHIFUJI Hideaki if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) { 1560cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 15611da177e4SLinus Torvalds goto reset; 1562cfb6eeb4SYOSHIFUJI Hideaki } 15631da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 15641da177e4SLinus Torvalds return 0; 15651da177e4SLinus Torvalds } 15661da177e4SLinus Torvalds 1567*ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 15681da177e4SLinus Torvalds goto csum_err; 15691da177e4SLinus Torvalds 15701da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 15711da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 15721da177e4SLinus Torvalds if (!nsk) 15731da177e4SLinus Torvalds goto discard; 15741da177e4SLinus Torvalds 15751da177e4SLinus Torvalds if (nsk != sk) { 1576cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1577cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 15781da177e4SLinus Torvalds goto reset; 1579cfb6eeb4SYOSHIFUJI Hideaki } 15801da177e4SLinus Torvalds return 0; 15811da177e4SLinus Torvalds } 15821da177e4SLinus Torvalds } 15831da177e4SLinus Torvalds 15841da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 1585cfb6eeb4SYOSHIFUJI Hideaki if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) { 1586cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 15871da177e4SLinus Torvalds goto reset; 1588cfb6eeb4SYOSHIFUJI Hideaki } 15891da177e4SLinus Torvalds TCP_CHECK_TIMER(sk); 15901da177e4SLinus Torvalds return 0; 15911da177e4SLinus Torvalds 15921da177e4SLinus Torvalds reset: 1593cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 15941da177e4SLinus Torvalds discard: 15951da177e4SLinus Torvalds kfree_skb(skb); 15961da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 15971da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 15981da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 15991da177e4SLinus Torvalds * but you have been warned. 16001da177e4SLinus Torvalds */ 16011da177e4SLinus Torvalds return 0; 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds csum_err: 16041da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 16051da177e4SLinus Torvalds goto discard; 16061da177e4SLinus Torvalds } 16071da177e4SLinus Torvalds 16081da177e4SLinus Torvalds /* 16091da177e4SLinus Torvalds * From tcp_input.c 16101da177e4SLinus Torvalds */ 16111da177e4SLinus Torvalds 16121da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 16131da177e4SLinus Torvalds { 1614eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 16151da177e4SLinus Torvalds struct tcphdr *th; 16161da177e4SLinus Torvalds struct sock *sk; 16171da177e4SLinus Torvalds int ret; 16181da177e4SLinus Torvalds 16191da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 16201da177e4SLinus Torvalds goto discard_it; 16211da177e4SLinus Torvalds 16221da177e4SLinus Torvalds /* Count it even if it's bad */ 16231da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INSEGS); 16241da177e4SLinus Torvalds 16251da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 16261da177e4SLinus Torvalds goto discard_it; 16271da177e4SLinus Torvalds 16281da177e4SLinus Torvalds th = skb->h.th; 16291da177e4SLinus Torvalds 16301da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 16311da177e4SLinus Torvalds goto bad_packet; 16321da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 16331da177e4SLinus Torvalds goto discard_it; 16341da177e4SLinus Torvalds 16351da177e4SLinus Torvalds /* An explanation is required here, I think. 16361da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1637caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 16381da177e4SLinus Torvalds * So, we defer the checks. */ 16391da177e4SLinus Torvalds if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1640fb286bb2SHerbert Xu tcp_v4_checksum_init(skb))) 16411da177e4SLinus Torvalds goto bad_packet; 16421da177e4SLinus Torvalds 16431da177e4SLinus Torvalds th = skb->h.th; 1644eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 16451da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 16461da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 16471da177e4SLinus Torvalds skb->len - th->doff * 4); 16481da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 16491da177e4SLinus Torvalds TCP_SKB_CB(skb)->when = 0; 1650eddc9ec5SArnaldo Carvalho de Melo TCP_SKB_CB(skb)->flags = iph->tos; 16511da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 16521da177e4SLinus Torvalds 1653eddc9ec5SArnaldo Carvalho de Melo sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, 1654eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, inet_iif(skb)); 16551da177e4SLinus Torvalds if (!sk) 16561da177e4SLinus Torvalds goto no_tcp_socket; 16571da177e4SLinus Torvalds 16581da177e4SLinus Torvalds process: 16591da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) 16601da177e4SLinus Torvalds goto do_time_wait; 16611da177e4SLinus Torvalds 16621da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 16631da177e4SLinus Torvalds goto discard_and_relse; 1664b59c2701SPatrick McHardy nf_reset(skb); 16651da177e4SLinus Torvalds 1666fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 16671da177e4SLinus Torvalds goto discard_and_relse; 16681da177e4SLinus Torvalds 16691da177e4SLinus Torvalds skb->dev = NULL; 16701da177e4SLinus Torvalds 1671c6366184SIngo Molnar bh_lock_sock_nested(sk); 16721da177e4SLinus Torvalds ret = 0; 16731da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 16741a2449a8SChris Leech #ifdef CONFIG_NET_DMA 16751a2449a8SChris Leech struct tcp_sock *tp = tcp_sk(sk); 16761a2449a8SChris Leech if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 16771a2449a8SChris Leech tp->ucopy.dma_chan = get_softnet_dma(); 16781a2449a8SChris Leech if (tp->ucopy.dma_chan) 16791a2449a8SChris Leech ret = tcp_v4_do_rcv(sk, skb); 16801a2449a8SChris Leech else 16811a2449a8SChris Leech #endif 16821a2449a8SChris Leech { 16831da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 16841da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 16851a2449a8SChris Leech } 16861da177e4SLinus Torvalds } else 16871da177e4SLinus Torvalds sk_add_backlog(sk, skb); 16881da177e4SLinus Torvalds bh_unlock_sock(sk); 16891da177e4SLinus Torvalds 16901da177e4SLinus Torvalds sock_put(sk); 16911da177e4SLinus Torvalds 16921da177e4SLinus Torvalds return ret; 16931da177e4SLinus Torvalds 16941da177e4SLinus Torvalds no_tcp_socket: 16951da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 16961da177e4SLinus Torvalds goto discard_it; 16971da177e4SLinus Torvalds 16981da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 16991da177e4SLinus Torvalds bad_packet: 17001da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 17011da177e4SLinus Torvalds } else { 1702cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 17031da177e4SLinus Torvalds } 17041da177e4SLinus Torvalds 17051da177e4SLinus Torvalds discard_it: 17061da177e4SLinus Torvalds /* Discard frame. */ 17071da177e4SLinus Torvalds kfree_skb(skb); 17081da177e4SLinus Torvalds return 0; 17091da177e4SLinus Torvalds 17101da177e4SLinus Torvalds discard_and_relse: 17111da177e4SLinus Torvalds sock_put(sk); 17121da177e4SLinus Torvalds goto discard_it; 17131da177e4SLinus Torvalds 17141da177e4SLinus Torvalds do_time_wait: 17151da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 17169469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17171da177e4SLinus Torvalds goto discard_it; 17181da177e4SLinus Torvalds } 17191da177e4SLinus Torvalds 17201da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 17211da177e4SLinus Torvalds TCP_INC_STATS_BH(TCP_MIB_INERRS); 17229469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17231da177e4SLinus Torvalds goto discard_it; 17241da177e4SLinus Torvalds } 17259469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 17261da177e4SLinus Torvalds case TCP_TW_SYN: { 172733b62231SArnaldo Carvalho de Melo struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1728eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1729463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 17301da177e4SLinus Torvalds if (sk2) { 17319469c7b4SYOSHIFUJI Hideaki inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 17329469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17331da177e4SLinus Torvalds sk = sk2; 17341da177e4SLinus Torvalds goto process; 17351da177e4SLinus Torvalds } 17361da177e4SLinus Torvalds /* Fall through to ACK */ 17371da177e4SLinus Torvalds } 17381da177e4SLinus Torvalds case TCP_TW_ACK: 17391da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 17401da177e4SLinus Torvalds break; 17411da177e4SLinus Torvalds case TCP_TW_RST: 17421da177e4SLinus Torvalds goto no_tcp_socket; 17431da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 17441da177e4SLinus Torvalds } 17451da177e4SLinus Torvalds goto discard_it; 17461da177e4SLinus Torvalds } 17471da177e4SLinus Torvalds 17481da177e4SLinus Torvalds /* VJ's idea. Save last timestamp seen from this destination 17491da177e4SLinus Torvalds * and hold it at least for normal timewait interval to use for duplicate 17501da177e4SLinus Torvalds * segment detection in subsequent connections, before they enter synchronized 17511da177e4SLinus Torvalds * state. 17521da177e4SLinus Torvalds */ 17531da177e4SLinus Torvalds 17541da177e4SLinus Torvalds int tcp_v4_remember_stamp(struct sock *sk) 17551da177e4SLinus Torvalds { 17561da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 17571da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 17581da177e4SLinus Torvalds struct rtable *rt = (struct rtable *)__sk_dst_get(sk); 17591da177e4SLinus Torvalds struct inet_peer *peer = NULL; 17601da177e4SLinus Torvalds int release_it = 0; 17611da177e4SLinus Torvalds 17621da177e4SLinus Torvalds if (!rt || rt->rt_dst != inet->daddr) { 17631da177e4SLinus Torvalds peer = inet_getpeer(inet->daddr, 1); 17641da177e4SLinus Torvalds release_it = 1; 17651da177e4SLinus Torvalds } else { 17661da177e4SLinus Torvalds if (!rt->peer) 17671da177e4SLinus Torvalds rt_bind_peer(rt, 1); 17681da177e4SLinus Torvalds peer = rt->peer; 17691da177e4SLinus Torvalds } 17701da177e4SLinus Torvalds 17711da177e4SLinus Torvalds if (peer) { 17721da177e4SLinus Torvalds if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 17739d729f72SJames Morris (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && 17741da177e4SLinus Torvalds peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { 17751da177e4SLinus Torvalds peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; 17761da177e4SLinus Torvalds peer->tcp_ts = tp->rx_opt.ts_recent; 17771da177e4SLinus Torvalds } 17781da177e4SLinus Torvalds if (release_it) 17791da177e4SLinus Torvalds inet_putpeer(peer); 17801da177e4SLinus Torvalds return 1; 17811da177e4SLinus Torvalds } 17821da177e4SLinus Torvalds 17831da177e4SLinus Torvalds return 0; 17841da177e4SLinus Torvalds } 17851da177e4SLinus Torvalds 17868feaf0c0SArnaldo Carvalho de Melo int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 17871da177e4SLinus Torvalds { 17888feaf0c0SArnaldo Carvalho de Melo struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); 17891da177e4SLinus Torvalds 17901da177e4SLinus Torvalds if (peer) { 17918feaf0c0SArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 17928feaf0c0SArnaldo Carvalho de Melo 17938feaf0c0SArnaldo Carvalho de Melo if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 17949d729f72SJames Morris (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && 17958feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { 17968feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; 17978feaf0c0SArnaldo Carvalho de Melo peer->tcp_ts = tcptw->tw_ts_recent; 17981da177e4SLinus Torvalds } 17991da177e4SLinus Torvalds inet_putpeer(peer); 18001da177e4SLinus Torvalds return 1; 18011da177e4SLinus Torvalds } 18021da177e4SLinus Torvalds 18031da177e4SLinus Torvalds return 0; 18041da177e4SLinus Torvalds } 18051da177e4SLinus Torvalds 18068292a17aSArnaldo Carvalho de Melo struct inet_connection_sock_af_ops ipv4_specific = { 18071da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 18081da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 180932519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 18101da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 18111da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 18121da177e4SLinus Torvalds .remember_stamp = tcp_v4_remember_stamp, 18131da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 18141da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 18151da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1816543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1817543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 18183fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 18193fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 18203fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 18213fdadf7dSDmitry Mishin #endif 18221da177e4SLinus Torvalds }; 18231da177e4SLinus Torvalds 1824cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1825b6332e6cSAndrew Morton static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1826cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 1827cfb6eeb4SYOSHIFUJI Hideaki .calc_md5_hash = tcp_v4_calc_md5_hash, 1828cfb6eeb4SYOSHIFUJI Hideaki .md5_add = tcp_v4_md5_add_func, 1829cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1830cfb6eeb4SYOSHIFUJI Hideaki }; 1831b6332e6cSAndrew Morton #endif 1832cfb6eeb4SYOSHIFUJI Hideaki 18331da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 18341da177e4SLinus Torvalds * sk_alloc() so need not be done here. 18351da177e4SLinus Torvalds */ 18361da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 18371da177e4SLinus Torvalds { 18386687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 18391da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 18401da177e4SLinus Torvalds 18411da177e4SLinus Torvalds skb_queue_head_init(&tp->out_of_order_queue); 18421da177e4SLinus Torvalds tcp_init_xmit_timers(sk); 18431da177e4SLinus Torvalds tcp_prequeue_init(tp); 18441da177e4SLinus Torvalds 18456687e988SArnaldo Carvalho de Melo icsk->icsk_rto = TCP_TIMEOUT_INIT; 18461da177e4SLinus Torvalds tp->mdev = TCP_TIMEOUT_INIT; 18471da177e4SLinus Torvalds 18481da177e4SLinus Torvalds /* So many TCP implementations out there (incorrectly) count the 18491da177e4SLinus Torvalds * initial SYN frame in their delayed-ACK and congestion control 18501da177e4SLinus Torvalds * algorithms that we must have the following bandaid to talk 18511da177e4SLinus Torvalds * efficiently to them. -DaveM 18521da177e4SLinus Torvalds */ 18531da177e4SLinus Torvalds tp->snd_cwnd = 2; 18541da177e4SLinus Torvalds 18551da177e4SLinus Torvalds /* See draft-stevens-tcpca-spec-01 for discussion of the 18561da177e4SLinus Torvalds * initialization of these values. 18571da177e4SLinus Torvalds */ 18581da177e4SLinus Torvalds tp->snd_ssthresh = 0x7fffffff; /* Infinity */ 18591da177e4SLinus Torvalds tp->snd_cwnd_clamp = ~0; 1860c1b4a7e6SDavid S. Miller tp->mss_cache = 536; 18611da177e4SLinus Torvalds 18621da177e4SLinus Torvalds tp->reordering = sysctl_tcp_reordering; 18636687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops = &tcp_init_congestion_ops; 18641da177e4SLinus Torvalds 18651da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 18661da177e4SLinus Torvalds 18671da177e4SLinus Torvalds sk->sk_write_space = sk_stream_write_space; 18681da177e4SLinus Torvalds sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 18691da177e4SLinus Torvalds 18708292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1871d83d8461SArnaldo Carvalho de Melo icsk->icsk_sync_mss = tcp_sync_mss; 1872cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1873cfb6eeb4SYOSHIFUJI Hideaki tp->af_specific = &tcp_sock_ipv4_specific; 1874cfb6eeb4SYOSHIFUJI Hideaki #endif 18751da177e4SLinus Torvalds 18761da177e4SLinus Torvalds sk->sk_sndbuf = sysctl_tcp_wmem[1]; 18771da177e4SLinus Torvalds sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 18781da177e4SLinus Torvalds 18791da177e4SLinus Torvalds atomic_inc(&tcp_sockets_allocated); 18801da177e4SLinus Torvalds 18811da177e4SLinus Torvalds return 0; 18821da177e4SLinus Torvalds } 18831da177e4SLinus Torvalds 18841da177e4SLinus Torvalds int tcp_v4_destroy_sock(struct sock *sk) 18851da177e4SLinus Torvalds { 18861da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 18871da177e4SLinus Torvalds 18881da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 18891da177e4SLinus Torvalds 18906687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1891317a76f9SStephen Hemminger 18921da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1893fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 18941da177e4SLinus Torvalds 18951da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 18961da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 18971da177e4SLinus Torvalds 1898cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1899cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1900cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1901cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_clear_md5_list(sk); 1902cfb6eeb4SYOSHIFUJI Hideaki kfree(tp->md5sig_info); 1903cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1904cfb6eeb4SYOSHIFUJI Hideaki } 1905cfb6eeb4SYOSHIFUJI Hideaki #endif 1906cfb6eeb4SYOSHIFUJI Hideaki 19071a2449a8SChris Leech #ifdef CONFIG_NET_DMA 19081a2449a8SChris Leech /* Cleans up our sk_async_wait_queue */ 19091a2449a8SChris Leech __skb_queue_purge(&sk->sk_async_wait_queue); 19101a2449a8SChris Leech #endif 19111a2449a8SChris Leech 19121da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 19131da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 19141da177e4SLinus Torvalds 19151da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1916463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 19172d8c4ce5SArnaldo Carvalho de Melo inet_put_port(&tcp_hashinfo, sk); 19181da177e4SLinus Torvalds 19191da177e4SLinus Torvalds /* 19201da177e4SLinus Torvalds * If sendmsg cached page exists, toss it. 19211da177e4SLinus Torvalds */ 19221da177e4SLinus Torvalds if (sk->sk_sndmsg_page) { 19231da177e4SLinus Torvalds __free_page(sk->sk_sndmsg_page); 19241da177e4SLinus Torvalds sk->sk_sndmsg_page = NULL; 19251da177e4SLinus Torvalds } 19261da177e4SLinus Torvalds 19271da177e4SLinus Torvalds atomic_dec(&tcp_sockets_allocated); 19281da177e4SLinus Torvalds 19291da177e4SLinus Torvalds return 0; 19301da177e4SLinus Torvalds } 19311da177e4SLinus Torvalds 19321da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 19331da177e4SLinus Torvalds 19341da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 19351da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 19361da177e4SLinus Torvalds 19378feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) 19381da177e4SLinus Torvalds { 19391da177e4SLinus Torvalds return hlist_empty(head) ? NULL : 19408feaf0c0SArnaldo Carvalho de Melo list_entry(head->first, struct inet_timewait_sock, tw_node); 19411da177e4SLinus Torvalds } 19421da177e4SLinus Torvalds 19438feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 19441da177e4SLinus Torvalds { 19451da177e4SLinus Torvalds return tw->tw_node.next ? 19461da177e4SLinus Torvalds hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 19471da177e4SLinus Torvalds } 19481da177e4SLinus Torvalds 19491da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 19501da177e4SLinus Torvalds { 1951463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 19521da177e4SLinus Torvalds struct hlist_node *node; 19531da177e4SLinus Torvalds struct sock *sk = cur; 19541da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 19551da177e4SLinus Torvalds 19561da177e4SLinus Torvalds if (!sk) { 19571da177e4SLinus Torvalds st->bucket = 0; 19586e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.listening_hash[0]); 19591da177e4SLinus Torvalds goto get_sk; 19601da177e4SLinus Torvalds } 19611da177e4SLinus Torvalds 19621da177e4SLinus Torvalds ++st->num; 19631da177e4SLinus Torvalds 19641da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 196560236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 19661da177e4SLinus Torvalds 1967463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 19681da177e4SLinus Torvalds req = req->dl_next; 19691da177e4SLinus Torvalds while (1) { 19701da177e4SLinus Torvalds while (req) { 197160236fddSArnaldo Carvalho de Melo if (req->rsk_ops->family == st->family) { 19721da177e4SLinus Torvalds cur = req; 19731da177e4SLinus Torvalds goto out; 19741da177e4SLinus Torvalds } 19751da177e4SLinus Torvalds req = req->dl_next; 19761da177e4SLinus Torvalds } 197772a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 19781da177e4SLinus Torvalds break; 19791da177e4SLinus Torvalds get_req: 1980463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 19811da177e4SLinus Torvalds } 19821da177e4SLinus Torvalds sk = sk_next(st->syn_wait_sk); 19831da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 1984463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 19851da177e4SLinus Torvalds } else { 1986463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1987463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1988463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 19891da177e4SLinus Torvalds goto start_req; 1990463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 19911da177e4SLinus Torvalds sk = sk_next(sk); 19921da177e4SLinus Torvalds } 19931da177e4SLinus Torvalds get_sk: 19941da177e4SLinus Torvalds sk_for_each_from(sk, node) { 19951da177e4SLinus Torvalds if (sk->sk_family == st->family) { 19961da177e4SLinus Torvalds cur = sk; 19971da177e4SLinus Torvalds goto out; 19981da177e4SLinus Torvalds } 1999463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2000463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2001463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 20021da177e4SLinus Torvalds start_req: 20031da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 20041da177e4SLinus Torvalds st->syn_wait_sk = sk; 20051da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 20061da177e4SLinus Torvalds st->sbucket = 0; 20071da177e4SLinus Torvalds goto get_req; 20081da177e4SLinus Torvalds } 2009463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20101da177e4SLinus Torvalds } 20110f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 20126e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); 20131da177e4SLinus Torvalds goto get_sk; 20141da177e4SLinus Torvalds } 20151da177e4SLinus Torvalds cur = NULL; 20161da177e4SLinus Torvalds out: 20171da177e4SLinus Torvalds return cur; 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds 20201da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 20211da177e4SLinus Torvalds { 20221da177e4SLinus Torvalds void *rc = listening_get_next(seq, NULL); 20231da177e4SLinus Torvalds 20241da177e4SLinus Torvalds while (rc && *pos) { 20251da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 20261da177e4SLinus Torvalds --*pos; 20271da177e4SLinus Torvalds } 20281da177e4SLinus Torvalds return rc; 20291da177e4SLinus Torvalds } 20301da177e4SLinus Torvalds 20311da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 20321da177e4SLinus Torvalds { 20331da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 20341da177e4SLinus Torvalds void *rc = NULL; 20351da177e4SLinus Torvalds 20366e04e021SArnaldo Carvalho de Melo for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 20371da177e4SLinus Torvalds struct sock *sk; 20381da177e4SLinus Torvalds struct hlist_node *node; 20398feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 20401da177e4SLinus Torvalds 20411da177e4SLinus Torvalds /* We can reschedule _before_ having picked the target: */ 20421da177e4SLinus Torvalds cond_resched_softirq(); 20431da177e4SLinus Torvalds 20446e04e021SArnaldo Carvalho de Melo read_lock(&tcp_hashinfo.ehash[st->bucket].lock); 20456e04e021SArnaldo Carvalho de Melo sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 20461da177e4SLinus Torvalds if (sk->sk_family != st->family) { 20471da177e4SLinus Torvalds continue; 20481da177e4SLinus Torvalds } 20491da177e4SLinus Torvalds rc = sk; 20501da177e4SLinus Torvalds goto out; 20511da177e4SLinus Torvalds } 20521da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 20538feaf0c0SArnaldo Carvalho de Melo inet_twsk_for_each(tw, node, 2054dbca9b27SEric Dumazet &tcp_hashinfo.ehash[st->bucket].twchain) { 20551da177e4SLinus Torvalds if (tw->tw_family != st->family) { 20561da177e4SLinus Torvalds continue; 20571da177e4SLinus Torvalds } 20581da177e4SLinus Torvalds rc = tw; 20591da177e4SLinus Torvalds goto out; 20601da177e4SLinus Torvalds } 20616e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 20621da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 20631da177e4SLinus Torvalds } 20641da177e4SLinus Torvalds out: 20651da177e4SLinus Torvalds return rc; 20661da177e4SLinus Torvalds } 20671da177e4SLinus Torvalds 20681da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 20691da177e4SLinus Torvalds { 20701da177e4SLinus Torvalds struct sock *sk = cur; 20718feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 20721da177e4SLinus Torvalds struct hlist_node *node; 20731da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 20741da177e4SLinus Torvalds 20751da177e4SLinus Torvalds ++st->num; 20761da177e4SLinus Torvalds 20771da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 20781da177e4SLinus Torvalds tw = cur; 20791da177e4SLinus Torvalds tw = tw_next(tw); 20801da177e4SLinus Torvalds get_tw: 20811da177e4SLinus Torvalds while (tw && tw->tw_family != st->family) { 20821da177e4SLinus Torvalds tw = tw_next(tw); 20831da177e4SLinus Torvalds } 20841da177e4SLinus Torvalds if (tw) { 20851da177e4SLinus Torvalds cur = tw; 20861da177e4SLinus Torvalds goto out; 20871da177e4SLinus Torvalds } 20886e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 20891da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 20901da177e4SLinus Torvalds 20911da177e4SLinus Torvalds /* We can reschedule between buckets: */ 20921da177e4SLinus Torvalds cond_resched_softirq(); 20931da177e4SLinus Torvalds 20946e04e021SArnaldo Carvalho de Melo if (++st->bucket < tcp_hashinfo.ehash_size) { 20956e04e021SArnaldo Carvalho de Melo read_lock(&tcp_hashinfo.ehash[st->bucket].lock); 20966e04e021SArnaldo Carvalho de Melo sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); 20971da177e4SLinus Torvalds } else { 20981da177e4SLinus Torvalds cur = NULL; 20991da177e4SLinus Torvalds goto out; 21001da177e4SLinus Torvalds } 21011da177e4SLinus Torvalds } else 21021da177e4SLinus Torvalds sk = sk_next(sk); 21031da177e4SLinus Torvalds 21041da177e4SLinus Torvalds sk_for_each_from(sk, node) { 21051da177e4SLinus Torvalds if (sk->sk_family == st->family) 21061da177e4SLinus Torvalds goto found; 21071da177e4SLinus Torvalds } 21081da177e4SLinus Torvalds 21091da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 2110dbca9b27SEric Dumazet tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 21111da177e4SLinus Torvalds goto get_tw; 21121da177e4SLinus Torvalds found: 21131da177e4SLinus Torvalds cur = sk; 21141da177e4SLinus Torvalds out: 21151da177e4SLinus Torvalds return cur; 21161da177e4SLinus Torvalds } 21171da177e4SLinus Torvalds 21181da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 21191da177e4SLinus Torvalds { 21201da177e4SLinus Torvalds void *rc = established_get_first(seq); 21211da177e4SLinus Torvalds 21221da177e4SLinus Torvalds while (rc && pos) { 21231da177e4SLinus Torvalds rc = established_get_next(seq, rc); 21241da177e4SLinus Torvalds --pos; 21251da177e4SLinus Torvalds } 21261da177e4SLinus Torvalds return rc; 21271da177e4SLinus Torvalds } 21281da177e4SLinus Torvalds 21291da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 21301da177e4SLinus Torvalds { 21311da177e4SLinus Torvalds void *rc; 21321da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 21331da177e4SLinus Torvalds 2134f3f05f70SArnaldo Carvalho de Melo inet_listen_lock(&tcp_hashinfo); 21351da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 21361da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 21371da177e4SLinus Torvalds 21381da177e4SLinus Torvalds if (!rc) { 2139f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 21401da177e4SLinus Torvalds local_bh_disable(); 21411da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21421da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 21431da177e4SLinus Torvalds } 21441da177e4SLinus Torvalds 21451da177e4SLinus Torvalds return rc; 21461da177e4SLinus Torvalds } 21471da177e4SLinus Torvalds 21481da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 21491da177e4SLinus Torvalds { 21501da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 21511da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 21521da177e4SLinus Torvalds st->num = 0; 21531da177e4SLinus Torvalds return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 21541da177e4SLinus Torvalds } 21551da177e4SLinus Torvalds 21561da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 21571da177e4SLinus Torvalds { 21581da177e4SLinus Torvalds void *rc = NULL; 21591da177e4SLinus Torvalds struct tcp_iter_state* st; 21601da177e4SLinus Torvalds 21611da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 21621da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 21631da177e4SLinus Torvalds goto out; 21641da177e4SLinus Torvalds } 21651da177e4SLinus Torvalds st = seq->private; 21661da177e4SLinus Torvalds 21671da177e4SLinus Torvalds switch (st->state) { 21681da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 21691da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21701da177e4SLinus Torvalds rc = listening_get_next(seq, v); 21711da177e4SLinus Torvalds if (!rc) { 2172f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 21731da177e4SLinus Torvalds local_bh_disable(); 21741da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21751da177e4SLinus Torvalds rc = established_get_first(seq); 21761da177e4SLinus Torvalds } 21771da177e4SLinus Torvalds break; 21781da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21791da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 21801da177e4SLinus Torvalds rc = established_get_next(seq, v); 21811da177e4SLinus Torvalds break; 21821da177e4SLinus Torvalds } 21831da177e4SLinus Torvalds out: 21841da177e4SLinus Torvalds ++*pos; 21851da177e4SLinus Torvalds return rc; 21861da177e4SLinus Torvalds } 21871da177e4SLinus Torvalds 21881da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 21891da177e4SLinus Torvalds { 21901da177e4SLinus Torvalds struct tcp_iter_state* st = seq->private; 21911da177e4SLinus Torvalds 21921da177e4SLinus Torvalds switch (st->state) { 21931da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 21941da177e4SLinus Torvalds if (v) { 2195463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2196463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 21971da177e4SLinus Torvalds } 21981da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21991da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 2200f3f05f70SArnaldo Carvalho de Melo inet_listen_unlock(&tcp_hashinfo); 22011da177e4SLinus Torvalds break; 22021da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 22031da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 22041da177e4SLinus Torvalds if (v) 22056e04e021SArnaldo Carvalho de Melo read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); 22061da177e4SLinus Torvalds local_bh_enable(); 22071da177e4SLinus Torvalds break; 22081da177e4SLinus Torvalds } 22091da177e4SLinus Torvalds } 22101da177e4SLinus Torvalds 22111da177e4SLinus Torvalds static int tcp_seq_open(struct inode *inode, struct file *file) 22121da177e4SLinus Torvalds { 22131da177e4SLinus Torvalds struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 22141da177e4SLinus Torvalds struct seq_file *seq; 22151da177e4SLinus Torvalds struct tcp_iter_state *s; 22161da177e4SLinus Torvalds int rc; 22171da177e4SLinus Torvalds 22181da177e4SLinus Torvalds if (unlikely(afinfo == NULL)) 22191da177e4SLinus Torvalds return -EINVAL; 22201da177e4SLinus Torvalds 22210da974f4SPanagiotis Issaris s = kzalloc(sizeof(*s), GFP_KERNEL); 22221da177e4SLinus Torvalds if (!s) 22231da177e4SLinus Torvalds return -ENOMEM; 22241da177e4SLinus Torvalds s->family = afinfo->family; 22251da177e4SLinus Torvalds s->seq_ops.start = tcp_seq_start; 22261da177e4SLinus Torvalds s->seq_ops.next = tcp_seq_next; 22271da177e4SLinus Torvalds s->seq_ops.show = afinfo->seq_show; 22281da177e4SLinus Torvalds s->seq_ops.stop = tcp_seq_stop; 22291da177e4SLinus Torvalds 22301da177e4SLinus Torvalds rc = seq_open(file, &s->seq_ops); 22311da177e4SLinus Torvalds if (rc) 22321da177e4SLinus Torvalds goto out_kfree; 22331da177e4SLinus Torvalds seq = file->private_data; 22341da177e4SLinus Torvalds seq->private = s; 22351da177e4SLinus Torvalds out: 22361da177e4SLinus Torvalds return rc; 22371da177e4SLinus Torvalds out_kfree: 22381da177e4SLinus Torvalds kfree(s); 22391da177e4SLinus Torvalds goto out; 22401da177e4SLinus Torvalds } 22411da177e4SLinus Torvalds 22421da177e4SLinus Torvalds int tcp_proc_register(struct tcp_seq_afinfo *afinfo) 22431da177e4SLinus Torvalds { 22441da177e4SLinus Torvalds int rc = 0; 22451da177e4SLinus Torvalds struct proc_dir_entry *p; 22461da177e4SLinus Torvalds 22471da177e4SLinus Torvalds if (!afinfo) 22481da177e4SLinus Torvalds return -EINVAL; 22491da177e4SLinus Torvalds afinfo->seq_fops->owner = afinfo->owner; 22501da177e4SLinus Torvalds afinfo->seq_fops->open = tcp_seq_open; 22511da177e4SLinus Torvalds afinfo->seq_fops->read = seq_read; 22521da177e4SLinus Torvalds afinfo->seq_fops->llseek = seq_lseek; 22531da177e4SLinus Torvalds afinfo->seq_fops->release = seq_release_private; 22541da177e4SLinus Torvalds 22551da177e4SLinus Torvalds p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); 22561da177e4SLinus Torvalds if (p) 22571da177e4SLinus Torvalds p->data = afinfo; 22581da177e4SLinus Torvalds else 22591da177e4SLinus Torvalds rc = -ENOMEM; 22601da177e4SLinus Torvalds return rc; 22611da177e4SLinus Torvalds } 22621da177e4SLinus Torvalds 22631da177e4SLinus Torvalds void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) 22641da177e4SLinus Torvalds { 22651da177e4SLinus Torvalds if (!afinfo) 22661da177e4SLinus Torvalds return; 22671da177e4SLinus Torvalds proc_net_remove(afinfo->name); 22681da177e4SLinus Torvalds memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 22691da177e4SLinus Torvalds } 22701da177e4SLinus Torvalds 227160236fddSArnaldo Carvalho de Melo static void get_openreq4(struct sock *sk, struct request_sock *req, 22721da177e4SLinus Torvalds char *tmpbuf, int i, int uid) 22731da177e4SLinus Torvalds { 22742e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 22751da177e4SLinus Torvalds int ttd = req->expires - jiffies; 22761da177e4SLinus Torvalds 22771da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 22781da177e4SLinus Torvalds " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 22791da177e4SLinus Torvalds i, 22802e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 22811da177e4SLinus Torvalds ntohs(inet_sk(sk)->sport), 22822e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 22832e6599cbSArnaldo Carvalho de Melo ntohs(ireq->rmt_port), 22841da177e4SLinus Torvalds TCP_SYN_RECV, 22851da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 22861da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 22871da177e4SLinus Torvalds jiffies_to_clock_t(ttd), 22881da177e4SLinus Torvalds req->retrans, 22891da177e4SLinus Torvalds uid, 22901da177e4SLinus Torvalds 0, /* non standard timer */ 22911da177e4SLinus Torvalds 0, /* open_requests have no inode */ 22921da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 22931da177e4SLinus Torvalds req); 22941da177e4SLinus Torvalds } 22951da177e4SLinus Torvalds 2296cf4c6bf8SIlpo Järvinen static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i) 22971da177e4SLinus Torvalds { 22981da177e4SLinus Torvalds int timer_active; 22991da177e4SLinus Torvalds unsigned long timer_expires; 2300cf4c6bf8SIlpo Järvinen struct tcp_sock *tp = tcp_sk(sk); 2301cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2302cf4c6bf8SIlpo Järvinen struct inet_sock *inet = inet_sk(sk); 2303714e85beSAl Viro __be32 dest = inet->daddr; 2304714e85beSAl Viro __be32 src = inet->rcv_saddr; 23051da177e4SLinus Torvalds __u16 destp = ntohs(inet->dport); 23061da177e4SLinus Torvalds __u16 srcp = ntohs(inet->sport); 23071da177e4SLinus Torvalds 2308463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 23091da177e4SLinus Torvalds timer_active = 1; 2310463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2311463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 23121da177e4SLinus Torvalds timer_active = 4; 2313463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2314cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 23151da177e4SLinus Torvalds timer_active = 2; 2316cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 23171da177e4SLinus Torvalds } else { 23181da177e4SLinus Torvalds timer_active = 0; 23191da177e4SLinus Torvalds timer_expires = jiffies; 23201da177e4SLinus Torvalds } 23211da177e4SLinus Torvalds 23221da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 23231da177e4SLinus Torvalds "%08X %5d %8d %lu %d %p %u %u %u %u %d", 2324cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 232547da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 2326cf4c6bf8SIlpo Järvinen sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : 23277174259eSArnaldo Carvalho de Melo (tp->rcv_nxt - tp->copied_seq), 23281da177e4SLinus Torvalds timer_active, 23291da177e4SLinus Torvalds jiffies_to_clock_t(timer_expires - jiffies), 2330463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2331cf4c6bf8SIlpo Järvinen sock_i_uid(sk), 23326687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2333cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2334cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 2335463c84b9SArnaldo Carvalho de Melo icsk->icsk_rto, 2336463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.ato, 2337463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 23381da177e4SLinus Torvalds tp->snd_cwnd, 23391da177e4SLinus Torvalds tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); 23401da177e4SLinus Torvalds } 23411da177e4SLinus Torvalds 23427174259eSArnaldo Carvalho de Melo static void get_timewait4_sock(struct inet_timewait_sock *tw, 23437174259eSArnaldo Carvalho de Melo char *tmpbuf, int i) 23441da177e4SLinus Torvalds { 234523f33c2dSAl Viro __be32 dest, src; 23461da177e4SLinus Torvalds __u16 destp, srcp; 23471da177e4SLinus Torvalds int ttd = tw->tw_ttd - jiffies; 23481da177e4SLinus Torvalds 23491da177e4SLinus Torvalds if (ttd < 0) 23501da177e4SLinus Torvalds ttd = 0; 23511da177e4SLinus Torvalds 23521da177e4SLinus Torvalds dest = tw->tw_daddr; 23531da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 23541da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 23551da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 23561da177e4SLinus Torvalds 23571da177e4SLinus Torvalds sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 23581da177e4SLinus Torvalds " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", 23591da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 23601da177e4SLinus Torvalds 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 23611da177e4SLinus Torvalds atomic_read(&tw->tw_refcnt), tw); 23621da177e4SLinus Torvalds } 23631da177e4SLinus Torvalds 23641da177e4SLinus Torvalds #define TMPSZ 150 23651da177e4SLinus Torvalds 23661da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 23671da177e4SLinus Torvalds { 23681da177e4SLinus Torvalds struct tcp_iter_state* st; 23691da177e4SLinus Torvalds char tmpbuf[TMPSZ + 1]; 23701da177e4SLinus Torvalds 23711da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 23721da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, 23731da177e4SLinus Torvalds " sl local_address rem_address st tx_queue " 23741da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 23751da177e4SLinus Torvalds "inode"); 23761da177e4SLinus Torvalds goto out; 23771da177e4SLinus Torvalds } 23781da177e4SLinus Torvalds st = seq->private; 23791da177e4SLinus Torvalds 23801da177e4SLinus Torvalds switch (st->state) { 23811da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23821da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 23831da177e4SLinus Torvalds get_tcp4_sock(v, tmpbuf, st->num); 23841da177e4SLinus Torvalds break; 23851da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 23861da177e4SLinus Torvalds get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid); 23871da177e4SLinus Torvalds break; 23881da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 23891da177e4SLinus Torvalds get_timewait4_sock(v, tmpbuf, st->num); 23901da177e4SLinus Torvalds break; 23911da177e4SLinus Torvalds } 23921da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); 23931da177e4SLinus Torvalds out: 23941da177e4SLinus Torvalds return 0; 23951da177e4SLinus Torvalds } 23961da177e4SLinus Torvalds 23971da177e4SLinus Torvalds static struct file_operations tcp4_seq_fops; 23981da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 23991da177e4SLinus Torvalds .owner = THIS_MODULE, 24001da177e4SLinus Torvalds .name = "tcp", 24011da177e4SLinus Torvalds .family = AF_INET, 24021da177e4SLinus Torvalds .seq_show = tcp4_seq_show, 24031da177e4SLinus Torvalds .seq_fops = &tcp4_seq_fops, 24041da177e4SLinus Torvalds }; 24051da177e4SLinus Torvalds 24061da177e4SLinus Torvalds int __init tcp4_proc_init(void) 24071da177e4SLinus Torvalds { 24081da177e4SLinus Torvalds return tcp_proc_register(&tcp4_seq_afinfo); 24091da177e4SLinus Torvalds } 24101da177e4SLinus Torvalds 24111da177e4SLinus Torvalds void tcp4_proc_exit(void) 24121da177e4SLinus Torvalds { 24131da177e4SLinus Torvalds tcp_proc_unregister(&tcp4_seq_afinfo); 24141da177e4SLinus Torvalds } 24151da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 24161da177e4SLinus Torvalds 24171da177e4SLinus Torvalds struct proto tcp_prot = { 24181da177e4SLinus Torvalds .name = "TCP", 24191da177e4SLinus Torvalds .owner = THIS_MODULE, 24201da177e4SLinus Torvalds .close = tcp_close, 24211da177e4SLinus Torvalds .connect = tcp_v4_connect, 24221da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2423463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 24241da177e4SLinus Torvalds .ioctl = tcp_ioctl, 24251da177e4SLinus Torvalds .init = tcp_v4_init_sock, 24261da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 24271da177e4SLinus Torvalds .shutdown = tcp_shutdown, 24281da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 24291da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 24301da177e4SLinus Torvalds .sendmsg = tcp_sendmsg, 24311da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 24321da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 24331da177e4SLinus Torvalds .hash = tcp_v4_hash, 24341da177e4SLinus Torvalds .unhash = tcp_unhash, 24351da177e4SLinus Torvalds .get_port = tcp_v4_get_port, 24361da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 24371da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 24380a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 24391da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 24401da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 24411da177e4SLinus Torvalds .sysctl_mem = sysctl_tcp_mem, 24421da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 24431da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 24441da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 24451da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 24466d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 244760236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 2448543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2449543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2450543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2451543d9cfeSArnaldo Carvalho de Melo #endif 24521da177e4SLinus Torvalds }; 24531da177e4SLinus Torvalds 24541da177e4SLinus Torvalds void __init tcp_v4_init(struct net_proto_family *ops) 24551da177e4SLinus Torvalds { 24567174259eSArnaldo Carvalho de Melo if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, 24577174259eSArnaldo Carvalho de Melo IPPROTO_TCP) < 0) 24581da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 24591da177e4SLinus Torvalds } 24601da177e4SLinus Torvalds 24611da177e4SLinus Torvalds EXPORT_SYMBOL(ipv4_specific); 24621da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_hashinfo); 24631da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_prot); 24641da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_unhash); 24651da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_conn_request); 24661da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_connect); 24671da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_do_rcv); 24681da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_remember_stamp); 24691da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_send_check); 24701da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 24711da177e4SLinus Torvalds 24721da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 24731da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_register); 24741da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_unregister); 24751da177e4SLinus Torvalds #endif 24761da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_local_port_range); 24771da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_tcp_low_latency); 24781da177e4SLinus Torvalds 2479