187270762SStephen Hemminger /* 2b7d7a9e3SLuca De Cicco * TCP Westwood+: end-to-end bandwidth estimation for TCP 387270762SStephen Hemminger * 4b7d7a9e3SLuca De Cicco * Angelo Dell'Aera: author of the first version of TCP Westwood+ in Linux 2.4 5b7d7a9e3SLuca De Cicco * 6b7d7a9e3SLuca De Cicco * Support at http://c3lab.poliba.it/index.php/Westwood 7b7d7a9e3SLuca De Cicco * Main references in literature: 8b7d7a9e3SLuca De Cicco * 9b7d7a9e3SLuca De Cicco * - Mascolo S, Casetti, M. Gerla et al. 10b7d7a9e3SLuca De Cicco * "TCP Westwood: bandwidth estimation for TCP" Proc. ACM Mobicom 2001 11b7d7a9e3SLuca De Cicco * 12b7d7a9e3SLuca De Cicco * - A. Grieco, s. Mascolo 13b7d7a9e3SLuca De Cicco * "Performance evaluation of New Reno, Vegas, Westwood+ TCP" ACM Computer 14b7d7a9e3SLuca De Cicco * Comm. Review, 2004 15b7d7a9e3SLuca De Cicco * 16b7d7a9e3SLuca De Cicco * - A. Dell'Aera, L. Grieco, S. Mascolo. 17b7d7a9e3SLuca De Cicco * "Linux 2.4 Implementation of Westwood+ TCP with Rate-Halving : 18b7d7a9e3SLuca De Cicco * A Performance Evaluation Over the Internet" (ICC 2004), Paris, June 2004 19b7d7a9e3SLuca De Cicco * 20b7d7a9e3SLuca De Cicco * Westwood+ employs end-to-end bandwidth measurement to set cwnd and 21b7d7a9e3SLuca De Cicco * ssthresh after packet loss. The probing phase is as the original Reno. 2287270762SStephen Hemminger */ 2387270762SStephen Hemminger 2487270762SStephen Hemminger #include <linux/config.h> 2587270762SStephen Hemminger #include <linux/mm.h> 2687270762SStephen Hemminger #include <linux/module.h> 2787270762SStephen Hemminger #include <linux/skbuff.h> 28a8c2190eSArnaldo Carvalho de Melo #include <linux/inet_diag.h> 2987270762SStephen Hemminger #include <net/tcp.h> 3087270762SStephen Hemminger 3187270762SStephen Hemminger /* TCP Westwood structure */ 3287270762SStephen Hemminger struct westwood { 3387270762SStephen Hemminger u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */ 3487270762SStephen Hemminger u32 bw_est; /* bandwidth estimate */ 3587270762SStephen Hemminger u32 rtt_win_sx; /* here starts a new evaluation... */ 3687270762SStephen Hemminger u32 bk; 3787270762SStephen Hemminger u32 snd_una; /* used for evaluating the number of acked bytes */ 3887270762SStephen Hemminger u32 cumul_ack; 3987270762SStephen Hemminger u32 accounted; 4087270762SStephen Hemminger u32 rtt; 4187270762SStephen Hemminger u32 rtt_min; /* minimum observed RTT */ 42f61e2901SStephen Hemminger u8 first_ack; /* flag which infers that this is the first ack */ 4387270762SStephen Hemminger }; 4487270762SStephen Hemminger 4587270762SStephen Hemminger 4687270762SStephen Hemminger /* TCP Westwood functions and constants */ 4787270762SStephen Hemminger #define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ 4887270762SStephen Hemminger #define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ 4987270762SStephen Hemminger 5087270762SStephen Hemminger /* 5187270762SStephen Hemminger * @tcp_westwood_create 5287270762SStephen Hemminger * This function initializes fields used in TCP Westwood+, 5387270762SStephen Hemminger * it is called after the initial SYN, so the sequence numbers 5487270762SStephen Hemminger * are correct but new passive connections we have no 5587270762SStephen Hemminger * information about RTTmin at this time so we simply set it to 5687270762SStephen Hemminger * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative 5787270762SStephen Hemminger * since in this way we're sure it will be updated in a consistent 5887270762SStephen Hemminger * way as soon as possible. It will reasonably happen within the first 5987270762SStephen Hemminger * RTT period of the connection lifetime. 6087270762SStephen Hemminger */ 616687e988SArnaldo Carvalho de Melo static void tcp_westwood_init(struct sock *sk) 6287270762SStephen Hemminger { 636687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 6487270762SStephen Hemminger 6587270762SStephen Hemminger w->bk = 0; 6687270762SStephen Hemminger w->bw_ns_est = 0; 6787270762SStephen Hemminger w->bw_est = 0; 6887270762SStephen Hemminger w->accounted = 0; 6987270762SStephen Hemminger w->cumul_ack = 0; 7087270762SStephen Hemminger w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; 7187270762SStephen Hemminger w->rtt_win_sx = tcp_time_stamp; 726687e988SArnaldo Carvalho de Melo w->snd_una = tcp_sk(sk)->snd_una; 73f61e2901SStephen Hemminger w->first_ack = 1; 7487270762SStephen Hemminger } 7587270762SStephen Hemminger 7687270762SStephen Hemminger /* 7787270762SStephen Hemminger * @westwood_do_filter 7887270762SStephen Hemminger * Low-pass filter. Implemented using constant coefficients. 7987270762SStephen Hemminger */ 8087270762SStephen Hemminger static inline u32 westwood_do_filter(u32 a, u32 b) 8187270762SStephen Hemminger { 8287270762SStephen Hemminger return (((7 * a) + b) >> 3); 8387270762SStephen Hemminger } 8487270762SStephen Hemminger 85*b3a92eabSLuca De Cicco static void westwood_filter(struct westwood *w, u32 delta) 8687270762SStephen Hemminger { 87*b3a92eabSLuca De Cicco /* If the filter is empty fill it with the first sample of bandwidth */ 88*b3a92eabSLuca De Cicco if (w->bw_ns_est == 0 && w->bw_est == 0) { 89*b3a92eabSLuca De Cicco w->bw_ns_est = w->bk / delta; 90*b3a92eabSLuca De Cicco w->bw_est = w->bw_ns_est; 91*b3a92eabSLuca De Cicco } else { 9287270762SStephen Hemminger w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta); 9387270762SStephen Hemminger w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est); 9487270762SStephen Hemminger } 95*b3a92eabSLuca De Cicco } 9687270762SStephen Hemminger 9787270762SStephen Hemminger /* 9887270762SStephen Hemminger * @westwood_pkts_acked 9987270762SStephen Hemminger * Called after processing group of packets. 10087270762SStephen Hemminger * but all westwood needs is the last sample of srtt. 10187270762SStephen Hemminger */ 1026687e988SArnaldo Carvalho de Melo static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) 10387270762SStephen Hemminger { 1046687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 10587270762SStephen Hemminger if (cnt > 0) 1066687e988SArnaldo Carvalho de Melo w->rtt = tcp_sk(sk)->srtt >> 3; 10787270762SStephen Hemminger } 10887270762SStephen Hemminger 10987270762SStephen Hemminger /* 11087270762SStephen Hemminger * @westwood_update_window 11187270762SStephen Hemminger * It updates RTT evaluation window if it is the right moment to do 11287270762SStephen Hemminger * it. If so it calls filter for evaluating bandwidth. 11387270762SStephen Hemminger */ 1146687e988SArnaldo Carvalho de Melo static void westwood_update_window(struct sock *sk) 11587270762SStephen Hemminger { 1166687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 11787270762SStephen Hemminger s32 delta = tcp_time_stamp - w->rtt_win_sx; 11887270762SStephen Hemminger 119b7d7a9e3SLuca De Cicco /* Initialize w->snd_una with the first acked sequence number in order 120f61e2901SStephen Hemminger * to fix mismatch between tp->snd_una and w->snd_una for the first 121f61e2901SStephen Hemminger * bandwidth sample 122f61e2901SStephen Hemminger */ 123f61e2901SStephen Hemminger if (w->first_ack) { 124f61e2901SStephen Hemminger w->snd_una = tcp_sk(sk)->snd_una; 125f61e2901SStephen Hemminger w->first_ack = 0; 126f61e2901SStephen Hemminger } 127f61e2901SStephen Hemminger 12887270762SStephen Hemminger /* 12987270762SStephen Hemminger * See if a RTT-window has passed. 13087270762SStephen Hemminger * Be careful since if RTT is less than 13187270762SStephen Hemminger * 50ms we don't filter but we continue 'building the sample'. 13287270762SStephen Hemminger * This minimum limit was chosen since an estimation on small 13387270762SStephen Hemminger * time intervals is better to avoid... 13487270762SStephen Hemminger * Obviously on a LAN we reasonably will always have 13587270762SStephen Hemminger * right_bound = left_bound + WESTWOOD_RTT_MIN 13687270762SStephen Hemminger */ 13787270762SStephen Hemminger if (w->rtt && delta > max_t(u32, w->rtt, TCP_WESTWOOD_RTT_MIN)) { 13887270762SStephen Hemminger westwood_filter(w, delta); 13987270762SStephen Hemminger 14087270762SStephen Hemminger w->bk = 0; 14187270762SStephen Hemminger w->rtt_win_sx = tcp_time_stamp; 14287270762SStephen Hemminger } 14387270762SStephen Hemminger } 14487270762SStephen Hemminger 14587270762SStephen Hemminger /* 14687270762SStephen Hemminger * @westwood_fast_bw 14787270762SStephen Hemminger * It is called when we are in fast path. In particular it is called when 14887270762SStephen Hemminger * header prediction is successful. In such case in fact update is 14987270762SStephen Hemminger * straight forward and doesn't need any particular care. 15087270762SStephen Hemminger */ 1516687e988SArnaldo Carvalho de Melo static inline void westwood_fast_bw(struct sock *sk) 15287270762SStephen Hemminger { 1536687e988SArnaldo Carvalho de Melo const struct tcp_sock *tp = tcp_sk(sk); 1546687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 15587270762SStephen Hemminger 1566687e988SArnaldo Carvalho de Melo westwood_update_window(sk); 15787270762SStephen Hemminger 15887270762SStephen Hemminger w->bk += tp->snd_una - w->snd_una; 15987270762SStephen Hemminger w->snd_una = tp->snd_una; 16087270762SStephen Hemminger w->rtt_min = min(w->rtt, w->rtt_min); 16187270762SStephen Hemminger } 16287270762SStephen Hemminger 16387270762SStephen Hemminger /* 16487270762SStephen Hemminger * @westwood_acked_count 16587270762SStephen Hemminger * This function evaluates cumul_ack for evaluating bk in case of 16687270762SStephen Hemminger * delayed or partial acks. 16787270762SStephen Hemminger */ 1686687e988SArnaldo Carvalho de Melo static inline u32 westwood_acked_count(struct sock *sk) 16987270762SStephen Hemminger { 1706687e988SArnaldo Carvalho de Melo const struct tcp_sock *tp = tcp_sk(sk); 1716687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 17287270762SStephen Hemminger 17387270762SStephen Hemminger w->cumul_ack = tp->snd_una - w->snd_una; 17487270762SStephen Hemminger 17587270762SStephen Hemminger /* If cumul_ack is 0 this is a dupack since it's not moving 17687270762SStephen Hemminger * tp->snd_una. 17787270762SStephen Hemminger */ 17887270762SStephen Hemminger if (!w->cumul_ack) { 17987270762SStephen Hemminger w->accounted += tp->mss_cache; 18087270762SStephen Hemminger w->cumul_ack = tp->mss_cache; 18187270762SStephen Hemminger } 18287270762SStephen Hemminger 18387270762SStephen Hemminger if (w->cumul_ack > tp->mss_cache) { 18487270762SStephen Hemminger /* Partial or delayed ack */ 18587270762SStephen Hemminger if (w->accounted >= w->cumul_ack) { 18687270762SStephen Hemminger w->accounted -= w->cumul_ack; 18787270762SStephen Hemminger w->cumul_ack = tp->mss_cache; 18887270762SStephen Hemminger } else { 18987270762SStephen Hemminger w->cumul_ack -= w->accounted; 19087270762SStephen Hemminger w->accounted = 0; 19187270762SStephen Hemminger } 19287270762SStephen Hemminger } 19387270762SStephen Hemminger 19487270762SStephen Hemminger w->snd_una = tp->snd_una; 19587270762SStephen Hemminger 19687270762SStephen Hemminger return w->cumul_ack; 19787270762SStephen Hemminger } 19887270762SStephen Hemminger 19987270762SStephen Hemminger 20087270762SStephen Hemminger /* 20187270762SStephen Hemminger * TCP Westwood 20287270762SStephen Hemminger * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it 20387270762SStephen Hemminger * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 20487270762SStephen Hemminger * so avoids ever returning 0. 20587270762SStephen Hemminger */ 20672dc5b92SStephen Hemminger static u32 tcp_westwood_bw_rttmin(const struct sock *sk) 20787270762SStephen Hemminger { 20872dc5b92SStephen Hemminger const struct tcp_sock *tp = tcp_sk(sk); 20972dc5b92SStephen Hemminger const struct westwood *w = inet_csk_ca(sk); 21072dc5b92SStephen Hemminger return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); 21187270762SStephen Hemminger } 21287270762SStephen Hemminger 2136687e988SArnaldo Carvalho de Melo static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) 21487270762SStephen Hemminger { 2156687e988SArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 2166687e988SArnaldo Carvalho de Melo struct westwood *w = inet_csk_ca(sk); 21787270762SStephen Hemminger 21887270762SStephen Hemminger switch(event) { 21987270762SStephen Hemminger case CA_EVENT_FAST_ACK: 2206687e988SArnaldo Carvalho de Melo westwood_fast_bw(sk); 22187270762SStephen Hemminger break; 22287270762SStephen Hemminger 22387270762SStephen Hemminger case CA_EVENT_COMPLETE_CWR: 22472dc5b92SStephen Hemminger tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 22587270762SStephen Hemminger break; 22687270762SStephen Hemminger 22787270762SStephen Hemminger case CA_EVENT_FRTO: 22872dc5b92SStephen Hemminger tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 22987270762SStephen Hemminger break; 23087270762SStephen Hemminger 23187270762SStephen Hemminger case CA_EVENT_SLOW_ACK: 2326687e988SArnaldo Carvalho de Melo westwood_update_window(sk); 2336687e988SArnaldo Carvalho de Melo w->bk += westwood_acked_count(sk); 23487270762SStephen Hemminger w->rtt_min = min(w->rtt, w->rtt_min); 23587270762SStephen Hemminger break; 23687270762SStephen Hemminger 23787270762SStephen Hemminger default: 23887270762SStephen Hemminger /* don't care */ 23987270762SStephen Hemminger break; 24087270762SStephen Hemminger } 24187270762SStephen Hemminger } 24287270762SStephen Hemminger 24387270762SStephen Hemminger 24487270762SStephen Hemminger /* Extract info for Tcp socket info provided via netlink. */ 2456687e988SArnaldo Carvalho de Melo static void tcp_westwood_info(struct sock *sk, u32 ext, 24687270762SStephen Hemminger struct sk_buff *skb) 24787270762SStephen Hemminger { 2486687e988SArnaldo Carvalho de Melo const struct westwood *ca = inet_csk_ca(sk); 24973c1f4a0SArnaldo Carvalho de Melo if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 25087270762SStephen Hemminger struct rtattr *rta; 25187270762SStephen Hemminger struct tcpvegas_info *info; 25287270762SStephen Hemminger 25373c1f4a0SArnaldo Carvalho de Melo rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); 25487270762SStephen Hemminger info = RTA_DATA(rta); 25587270762SStephen Hemminger info->tcpv_enabled = 1; 25687270762SStephen Hemminger info->tcpv_rttcnt = 0; 25787270762SStephen Hemminger info->tcpv_rtt = jiffies_to_usecs(ca->rtt); 25887270762SStephen Hemminger info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); 25987270762SStephen Hemminger rtattr_failure: ; 26087270762SStephen Hemminger } 26187270762SStephen Hemminger } 26287270762SStephen Hemminger 26387270762SStephen Hemminger 26487270762SStephen Hemminger static struct tcp_congestion_ops tcp_westwood = { 26587270762SStephen Hemminger .init = tcp_westwood_init, 26687270762SStephen Hemminger .ssthresh = tcp_reno_ssthresh, 26787270762SStephen Hemminger .cong_avoid = tcp_reno_cong_avoid, 26872dc5b92SStephen Hemminger .min_cwnd = tcp_westwood_bw_rttmin, 26987270762SStephen Hemminger .cwnd_event = tcp_westwood_event, 27087270762SStephen Hemminger .get_info = tcp_westwood_info, 27187270762SStephen Hemminger .pkts_acked = tcp_westwood_pkts_acked, 27287270762SStephen Hemminger 27387270762SStephen Hemminger .owner = THIS_MODULE, 27487270762SStephen Hemminger .name = "westwood" 27587270762SStephen Hemminger }; 27687270762SStephen Hemminger 27787270762SStephen Hemminger static int __init tcp_westwood_register(void) 27887270762SStephen Hemminger { 2796687e988SArnaldo Carvalho de Melo BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); 28087270762SStephen Hemminger return tcp_register_congestion_control(&tcp_westwood); 28187270762SStephen Hemminger } 28287270762SStephen Hemminger 28387270762SStephen Hemminger static void __exit tcp_westwood_unregister(void) 28487270762SStephen Hemminger { 28587270762SStephen Hemminger tcp_unregister_congestion_control(&tcp_westwood); 28687270762SStephen Hemminger } 28787270762SStephen Hemminger 28887270762SStephen Hemminger module_init(tcp_westwood_register); 28987270762SStephen Hemminger module_exit(tcp_westwood_unregister); 29087270762SStephen Hemminger 29187270762SStephen Hemminger MODULE_AUTHOR("Stephen Hemminger, Angelo Dell'Aera"); 29287270762SStephen Hemminger MODULE_LICENSE("GPL"); 29387270762SStephen Hemminger MODULE_DESCRIPTION("TCP Westwood+"); 294