1 /*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 10 * James Healy, made possible in part by a grant from the Cisco University 11 * Research Program Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University of 42 * Technology's Centre for Advanced Internet Architectures, Melbourne, 43 * Australia, which was made possible in part by a grant from the Cisco 44 * University Research Program Fund at Community Foundation Silicon Valley. 45 * More details are available at: 46 * http://caia.swin.edu.au/urp/newtcp/ 47 */ 48 49 #ifndef _NETINET_CC_CC_H_ 50 #define _NETINET_CC_CC_H_ 51 52 #ifdef _KERNEL 53 54 MALLOC_DECLARE(M_CC_MEM); 55 56 /* Global CC vars. */ 57 extern STAILQ_HEAD(cc_head, cc_algo) cc_list; 58 extern const int tcprexmtthresh; 59 60 /* Per-netstack bits. */ 61 VNET_DECLARE(struct cc_algo *, default_cc_ptr); 62 #define V_default_cc_ptr VNET(default_cc_ptr) 63 64 VNET_DECLARE(int, cc_do_abe); 65 #define V_cc_do_abe VNET(cc_do_abe) 66 67 VNET_DECLARE(int, cc_abe_frlossreduce); 68 #define V_cc_abe_frlossreduce VNET(cc_abe_frlossreduce) 69 70 /* Define the new net.inet.tcp.cc sysctl tree. */ 71 #ifdef _SYS_SYSCTL_H_ 72 SYSCTL_DECL(_net_inet_tcp_cc); 73 #endif 74 75 /* For CC modules that use hystart++ */ 76 extern uint32_t hystart_lowcwnd; 77 extern uint32_t hystart_minrtt_thresh; 78 extern uint32_t hystart_maxrtt_thresh; 79 extern uint32_t hystart_n_rttsamples; 80 extern uint32_t hystart_css_growth_div; 81 extern uint32_t hystart_css_rounds; 82 extern uint32_t hystart_bblogs; 83 84 /* CC housekeeping functions. */ 85 int cc_register_algo(struct cc_algo *add_cc); 86 int cc_deregister_algo(struct cc_algo *remove_cc); 87 #endif /* _KERNEL */ 88 89 #if defined(_KERNEL) || defined(_WANT_TCPCB) 90 /* 91 * Wrapper around transport structs that contain same-named congestion 92 * control variables. Allows algos to be shared amongst multiple CC aware 93 * transprots. 94 */ 95 struct cc_var { 96 void *cc_data; /* Per-connection private CC algorithm data. */ 97 int bytes_this_ack; /* # bytes acked by the current ACK. */ 98 tcp_seq curack; /* Most recent ACK. */ 99 uint32_t flags; /* Flags for cc_var (see below) */ 100 int type; /* Indicates which ptr is valid in ccvc. */ 101 union ccv_container { 102 struct tcpcb *tcp; 103 struct sctp_nets *sctp; 104 } ccvc; 105 uint16_t nsegs; /* # segments coalesced into current chain. */ 106 uint8_t labc; /* Dont use system abc use passed in */ 107 }; 108 109 /* cc_var flags. */ 110 #define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */ 111 #define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */ 112 #define CCF_USE_LOCAL_ABC 0x0004 /* Dont use the system l_abc val */ 113 #define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */ 114 #define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */ 115 #define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */ 116 #define CCF_MAX_CWND 0x0040 /* Have we reached maximum cwnd? */ 117 #define CCF_CHG_MAX_CWND 0x0080 /* CUBIC max_cwnd changed, for K */ 118 #define CCF_USR_IWND 0x0100 /* User specified initial window */ 119 #define CCF_USR_IWND_INIT_NSEG 0x0200 /* Convert segs to bytes on conn init */ 120 #define CCF_HYSTART_ALLOWED 0x0400 /* If the CC supports it Hystart is allowed */ 121 #define CCF_HYSTART_CAN_SH_CWND 0x0800 /* Can hystart when going CSS -> CA slam the cwnd */ 122 #define CCF_HYSTART_CONS_SSTH 0x1000 /* Should hystart use the more conservative ssthresh */ 123 124 #endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */ 125 typedef enum { 126 #if defined(_KERNEL) || defined(_WANT_TCPCB) 127 /* ACK types passed to the ack_received() hook. */ 128 CC_ACK = 0x0001, /* Regular in sequence ACK. */ 129 CC_DUPACK = 0x0002, /* Duplicate ACK. */ 130 CC_PARTIALACK = 0x0004, /* Not yet. */ 131 CC_SACK = 0x0008, /* Not yet. */ 132 #endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */ 133 /* Congestion signal types passed to the cong_signal() hook. */ 134 CC_ECN = 0x0100, /* ECN marked packet received. */ 135 CC_RTO = 0x0200, /* RTO fired. */ 136 CC_RTO_ERR = 0x0400, /* RTO fired in error. */ 137 CC_NDUPACK = 0x0800, /* Threshold of dupack's reached. */ 138 /* 139 * The highest order 8 bits (0x01000000 - 0x80000000) are reserved 140 * for CC algos to declare their own congestion signal types. 141 */ 142 CC_SIGPRIVMASK = 0xFF000000 /* Mask to check if sig is private. */ 143 } ccsignal_t; 144 145 #ifdef _KERNEL 146 /* 147 * Structure to hold data and function pointers that together represent a 148 * congestion control algorithm. 149 */ 150 struct cc_algo { 151 char name[TCP_CA_NAME_MAX]; 152 153 /* Init global module state on kldload. */ 154 int (*mod_init)(void); 155 156 /* Cleanup global module state on kldunload. */ 157 int (*mod_destroy)(void); 158 159 /* Return the size of the void pointer the CC needs for state */ 160 size_t (*cc_data_sz)(void); 161 162 /* 163 * Init CC state for a new control block. The CC 164 * module may be passed a NULL ptr indicating that 165 * it must allocate the memory. If it is passed a 166 * non-null pointer it is pre-allocated memory by 167 * the caller and the cb_init is expected to use that memory. 168 * It is not expected to fail if memory is passed in and 169 * all currently defined modules do not. 170 */ 171 int (*cb_init)(struct cc_var *ccv, void *ptr); 172 173 /* Cleanup CC state for a terminating control block. */ 174 void (*cb_destroy)(struct cc_var *ccv); 175 176 /* Init variables for a newly established connection. */ 177 void (*conn_init)(struct cc_var *ccv); 178 179 /* Called on receipt of an ack. */ 180 void (*ack_received)(struct cc_var *ccv, ccsignal_t type); 181 182 /* Called on detection of a congestion signal. */ 183 void (*cong_signal)(struct cc_var *ccv, ccsignal_t type); 184 185 /* Called after exiting congestion recovery. */ 186 void (*post_recovery)(struct cc_var *ccv); 187 188 /* Called when data transfer resumes after an idle period. */ 189 void (*after_idle)(struct cc_var *ccv); 190 191 /* Called for an additional ECN processing apart from RFC3168. */ 192 void (*ecnpkt_handler)(struct cc_var *ccv); 193 194 /* Called when a new "round" begins, if the transport is tracking rounds. */ 195 void (*newround)(struct cc_var *ccv, uint32_t round_cnt); 196 197 /* 198 * Called when a RTT sample is made (fas = flight at send, if you dont have it 199 * send the cwnd in). 200 */ 201 void (*rttsample)(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas); 202 203 /* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */ 204 int (*ctl_output)(struct cc_var *, struct sockopt *, void *); 205 206 STAILQ_ENTRY (cc_algo) entries; 207 u_int cc_refcount; 208 uint8_t flags; 209 }; 210 211 #define CC_MODULE_BEING_REMOVED 0x01 /* The module is being removed */ 212 213 /* Macro to obtain the CC algo's struct ptr. */ 214 #define CC_ALGO(tp) ((tp)->t_cc) 215 216 /* Macro to obtain the CC algo's data ptr. */ 217 #define CC_DATA(tp) ((tp)->t_ccv.cc_data) 218 219 /* Macro to obtain the system default CC algo's struct ptr. */ 220 #define CC_DEFAULT_ALGO() V_default_cc_ptr 221 222 extern struct rwlock cc_list_lock; 223 #define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list") 224 #define CC_LIST_LOCK_DESTROY() rw_destroy(&cc_list_lock) 225 #define CC_LIST_RLOCK() rw_rlock(&cc_list_lock) 226 #define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock) 227 #define CC_LIST_WLOCK() rw_wlock(&cc_list_lock) 228 #define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock) 229 #define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED) 230 231 #define CC_ALGOOPT_LIMIT 2048 232 233 /* 234 * These routines give NewReno behavior to the caller 235 * they require no state and can be used by any other CC 236 * module that wishes to use NewReno type behaviour (along 237 * with anything else they may add on, pre or post call). 238 */ 239 void newreno_cc_post_recovery(struct cc_var *); 240 void newreno_cc_after_idle(struct cc_var *); 241 void newreno_cc_cong_signal(struct cc_var *, ccsignal_t); 242 void newreno_cc_ack_received(struct cc_var *, ccsignal_t); 243 244 /* Called to temporarily keep an algo from going away during change */ 245 void cc_refer(struct cc_algo *algo); 246 /* Called to release the temporary hold */ 247 void cc_release(struct cc_algo *algo); 248 249 /* Called to attach a CC algorithm to a tcpcb */ 250 void cc_attach(struct tcpcb *, struct cc_algo *); 251 /* Called to detach a CC algorithm from a tcpcb */ 252 void cc_detach(struct tcpcb *); 253 254 #endif /* _KERNEL */ 255 #endif /* _NETINET_CC_CC_H_ */ 256