1 /* 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * Copyright (c) 2017 by Delphix. All rights reserved. 8 * 9 * This software was developed at the Centre for Advanced Internet 10 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 11 * James Healy, made possible in part by a grant from the Cisco University 12 * Research Program Fund at Community Foundation Silicon Valley. 13 * 14 * Portions of this software were developed at the Centre for Advanced 15 * Internet Architectures, Swinburne University of Technology, Melbourne, 16 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * $FreeBSD$ 40 */ 41 42 /* 43 * This software was first released in 2007 by James Healy and Lawrence Stewart 44 * whilst working on the NewTCP research project at Swinburne University of 45 * Technology's Centre for Advanced Internet Architectures, Melbourne, 46 * Australia, which was made possible in part by a grant from the Cisco 47 * University Research Program Fund at Community Foundation Silicon Valley. 48 * More details are available at: 49 * http://caia.swin.edu.au/urp/newtcp/ 50 */ 51 52 #ifndef _NETINET_CC_H_ 53 #define _NETINET_CC_H_ 54 55 #if (defined(_KERNEL) || defined(_KMEMUSER)) 56 57 #ifdef __cplusplus 58 extern "C" { 59 #endif 60 61 #include <netinet/tcp.h> 62 #include <sys/queue.h> 63 #include <sys/rwlock.h> 64 65 #define CC_ALGO_NAME_MAX 16 /* max congestion control name length */ 66 67 #define CC_DEFAULT_ALGO_NAME "sunreno" 68 69 struct tcp_s; 70 struct sctp_s; 71 72 /* CC housekeeping functions. */ 73 extern struct cc_algo *cc_load_algo(const char *name); 74 extern int cc_register_algo(struct cc_algo *add_cc); 75 extern int cc_deregister_algo(struct cc_algo *remove_cc); 76 77 /* 78 * Wrapper around transport structs that contain same-named congestion 79 * control variables. Allows algos to be shared amongst multiple CC aware 80 * transports. 81 * 82 * In theory, this code (from FreeBSD) can be used to support pluggable 83 * congestion control for sctp as well as tcp. However, the support for sctp 84 * in FreeBSD is incomplete, and in practice "type" is ignored. cc_module.h 85 * provides a CCV macro which implementations can use to get a variable out of 86 * the protocol-appropriate structure. 87 * 88 * If FreeBSD eventually does extend support for pluggable congestion control 89 * to sctp, we'll need to make sure we're setting "type" appropriately or use 90 * a definition of CCV that ignores it. 91 */ 92 struct cc_var { 93 void *cc_data; /* Per-connection private algorithm data. */ 94 int bytes_this_ack; /* # bytes acked by the current ACK. */ 95 int t_bytes_acked; /* # bytes acked during current RTT */ 96 tcp_seq curack; /* Most recent ACK. */ 97 uint32_t flags; /* Flags for cc_var (see below) */ 98 int type; /* Indicates which ptr is valid in ccvc. */ 99 union ccv_container { 100 struct tcp_s *tcp; 101 struct sctp_s *sctp; 102 } ccvc; 103 uint16_t nsegs; /* # segments coalesced into current chain. */ 104 }; 105 106 /* 107 * cc_var flags. 108 * 109 * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed 110 * according to the Appropriate Byte Counting spec, defined in RFC 3465. 111 */ 112 #define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */ 113 #define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */ 114 #define CCF_FASTRECOVERY 0x0004 /* in NewReno Fast Recovery */ 115 #define CCF_WASFRECOVERY 0x0008 /* was in NewReno Fast Recovery */ 116 #define CCF_CONGRECOVERY 0x0010 /* congestion recovery mode */ 117 #define CCF_WASCRECOVERY 0x0020 /* was in congestion recovery */ 118 /* 119 * In slow-start due to a retransmission timeout. This flag is enabled for the 120 * duration of the slow-start phase. 121 */ 122 #define CCF_RTO 0x0040 /* in slow-start due to timeout */ 123 124 #define IN_FASTRECOVERY(flags) (flags & CCF_FASTRECOVERY) 125 #define ENTER_FASTRECOVERY(flags) flags |= CCF_FASTRECOVERY 126 #define EXIT_FASTRECOVERY(flags) flags &= ~CCF_FASTRECOVERY 127 128 #define IN_CONGRECOVERY(flags) (flags & CCF_CONGRECOVERY) 129 #define ENTER_CONGRECOVERY(flags) flags |= CCF_CONGRECOVERY 130 #define EXIT_CONGRECOVERY(flags) flags &= ~CCF_CONGRECOVERY 131 132 #define IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY)) 133 #define ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY) 134 #define EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY) 135 136 /* 137 * ACK types passed to the ack_received() hook. 138 * 139 * CC_ACK is passed when an ACK acknowledges previously unACKed data. 140 * CC_DUPACK is passed when a duplicate ACK is received. The conditions under 141 * which an ACK is considered a duplicate ACK are defined in RFC 5681. 142 */ 143 #define CC_ACK 0x0001 /* Regular in sequence ACK. */ 144 #define CC_DUPACK 0x0002 /* Duplicate ACK. */ 145 #define CC_PARTIALACK 0x0004 /* Not yet. */ 146 #define CC_SACK 0x0008 /* Not yet. */ 147 148 /* 149 * Congestion signal types passed to the cong_signal() hook. The highest order 8 150 * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own 151 * congestion signal types. 152 * 153 * The congestion signals defined here cover the following situations: 154 * CC_ECN: A packet with an Explicit Congestion Notification was received 155 * See RFC 3168. 156 * CC_RTO: A round-trip timeout occured. 157 * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO 158 * for that sequence number 159 * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving 160 * N duplicate ACKs indicates packet loss rather than reordering. Fast 161 * retransmit is followed by fast recovery. Fast retransmit and recovery 162 * were originally described in RFC 2581 and were updated by RFC3782 163 * (NewReno). In both RFC2581 and RFC3782, N is 3. 164 */ 165 #define CC_ECN 0x00000001 /* ECN marked packet received. */ 166 #define CC_RTO 0x00000002 /* RTO fired. */ 167 #define CC_RTO_ERR 0x00000004 /* RTO fired in error. */ 168 #define CC_NDUPACK 0x00000008 /* Threshold of dupack's reached. */ 169 170 #define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */ 171 172 /* 173 * Structure to hold data and function pointers that together represent a 174 * congestion control algorithm. 175 */ 176 struct cc_algo { 177 char name[CC_ALGO_NAME_MAX]; 178 179 /* Init CC state for a new control block. */ 180 int (*cb_init)(struct cc_var *ccv); 181 182 /* Cleanup CC state for a terminating control block. */ 183 void (*cb_destroy)(struct cc_var *ccv); 184 185 /* Init variables for a newly established connection. */ 186 void (*conn_init)(struct cc_var *ccv); 187 188 /* Called on receipt of an ack. */ 189 void (*ack_received)(struct cc_var *ccv, uint16_t type); 190 191 /* Called on detection of a congestion signal. */ 192 void (*cong_signal)(struct cc_var *ccv, uint32_t type); 193 194 /* Called after exiting congestion recovery. */ 195 void (*post_recovery)(struct cc_var *ccv); 196 197 /* Called when data transfer resumes after an idle period. */ 198 void (*after_idle)(struct cc_var *ccv); 199 200 STAILQ_ENTRY(cc_algo) entries; 201 }; 202 203 typedef int cc_walk_func_t(void *, struct cc_algo *); 204 extern int cc_walk_algos(cc_walk_func_t *, void *); 205 206 /* Macro to obtain the CC algo's struct ptr. */ 207 #define CC_ALGO(tp) ((tp)->tcp_cc_algo) 208 209 /* Macro to obtain the CC algo's data ptr. */ 210 #define CC_DATA(tp) ((tp)->tcp_ccv.cc_data) 211 212 #ifdef __cplusplus 213 } 214 #endif 215 216 #endif /* (defined(_KERNEL) || defined(_KMEMUSER)) */ 217 218 #endif /* _NETINET_CC_H_ */ 219