1*f7220c48SRichard Scheffenegger /*- 2*f7220c48SRichard Scheffenegger * SPDX-License-Identifier: BSD-3-Clause 3*f7220c48SRichard Scheffenegger * 4*f7220c48SRichard Scheffenegger * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 5*f7220c48SRichard Scheffenegger * The Regents of the University of California. All rights reserved. 6*f7220c48SRichard Scheffenegger * Copyright (c) 2007-2008,2010 7*f7220c48SRichard Scheffenegger * Swinburne University of Technology, Melbourne, Australia. 8*f7220c48SRichard Scheffenegger * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 9*f7220c48SRichard Scheffenegger * Copyright (c) 2010 The FreeBSD Foundation 10*f7220c48SRichard Scheffenegger * Copyright (c) 2010-2011 Juniper Networks, Inc. 11*f7220c48SRichard Scheffenegger * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> 12*f7220c48SRichard Scheffenegger * All rights reserved. 13*f7220c48SRichard Scheffenegger * 14*f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced Internet 15*f7220c48SRichard Scheffenegger * Architectures, Swinburne University of Technology, by Lawrence Stewart, 16*f7220c48SRichard Scheffenegger * James Healy and David Hayes, made possible in part by a grant from the Cisco 17*f7220c48SRichard Scheffenegger * University Research Program Fund at Community Foundation Silicon Valley. 18*f7220c48SRichard Scheffenegger * 19*f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced 20*f7220c48SRichard Scheffenegger * Internet Architectures, Swinburne University of Technology, Melbourne, 21*f7220c48SRichard Scheffenegger * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 22*f7220c48SRichard Scheffenegger * 23*f7220c48SRichard Scheffenegger * Portions of this software were developed by Robert N. M. Watson under 24*f7220c48SRichard Scheffenegger * contract to Juniper Networks, Inc. 25*f7220c48SRichard Scheffenegger * 26*f7220c48SRichard Scheffenegger * Redistribution and use in source and binary forms, with or without 27*f7220c48SRichard Scheffenegger * modification, are permitted provided that the following conditions 28*f7220c48SRichard Scheffenegger * are met: 29*f7220c48SRichard Scheffenegger * 1. Redistributions of source code must retain the above copyright 30*f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer. 31*f7220c48SRichard Scheffenegger * 2. Redistributions in binary form must reproduce the above copyright 32*f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer in the 33*f7220c48SRichard Scheffenegger * documentation and/or other materials provided with the distribution. 34*f7220c48SRichard Scheffenegger * 3. Neither the name of the University nor the names of its contributors 35*f7220c48SRichard Scheffenegger * may be used to endorse or promote products derived from this software 36*f7220c48SRichard Scheffenegger * without specific prior written permission. 37*f7220c48SRichard Scheffenegger * 38*f7220c48SRichard Scheffenegger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39*f7220c48SRichard Scheffenegger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40*f7220c48SRichard Scheffenegger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41*f7220c48SRichard Scheffenegger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42*f7220c48SRichard Scheffenegger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43*f7220c48SRichard Scheffenegger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44*f7220c48SRichard Scheffenegger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45*f7220c48SRichard Scheffenegger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46*f7220c48SRichard Scheffenegger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47*f7220c48SRichard Scheffenegger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48*f7220c48SRichard Scheffenegger * SUCH DAMAGE. 49*f7220c48SRichard Scheffenegger * 50*f7220c48SRichard Scheffenegger * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 51*f7220c48SRichard Scheffenegger */ 52*f7220c48SRichard Scheffenegger 53*f7220c48SRichard Scheffenegger /* 54*f7220c48SRichard Scheffenegger * Utility functions to deal with Explicit Congestion Notification in TCP 55*f7220c48SRichard Scheffenegger * implementing the essential parts of the Accurate ECN extension 56*f7220c48SRichard Scheffenegger * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 57*f7220c48SRichard Scheffenegger */ 58*f7220c48SRichard Scheffenegger 59*f7220c48SRichard Scheffenegger #include <sys/cdefs.h> 60*f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$"); 61*f7220c48SRichard Scheffenegger 62*f7220c48SRichard Scheffenegger #include "opt_inet.h" 63*f7220c48SRichard Scheffenegger #include "opt_inet6.h" 64*f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h" 65*f7220c48SRichard Scheffenegger 66*f7220c48SRichard Scheffenegger #include <sys/param.h> 67*f7220c48SRichard Scheffenegger #include <sys/systm.h> 68*f7220c48SRichard Scheffenegger #include <sys/kernel.h> 69*f7220c48SRichard Scheffenegger #include <sys/sysctl.h> 70*f7220c48SRichard Scheffenegger #include <sys/malloc.h> 71*f7220c48SRichard Scheffenegger #include <sys/mbuf.h> 72*f7220c48SRichard Scheffenegger #include <sys/socket.h> 73*f7220c48SRichard Scheffenegger #include <sys/socketvar.h> 74*f7220c48SRichard Scheffenegger 75*f7220c48SRichard Scheffenegger #include <machine/cpu.h> 76*f7220c48SRichard Scheffenegger 77*f7220c48SRichard Scheffenegger #include <vm/uma.h> 78*f7220c48SRichard Scheffenegger 79*f7220c48SRichard Scheffenegger #include <net/if.h> 80*f7220c48SRichard Scheffenegger #include <net/if_var.h> 81*f7220c48SRichard Scheffenegger #include <net/route.h> 82*f7220c48SRichard Scheffenegger #include <net/vnet.h> 83*f7220c48SRichard Scheffenegger 84*f7220c48SRichard Scheffenegger #include <netinet/in.h> 85*f7220c48SRichard Scheffenegger #include <netinet/in_systm.h> 86*f7220c48SRichard Scheffenegger #include <netinet/ip.h> 87*f7220c48SRichard Scheffenegger #include <netinet/in_var.h> 88*f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h> 89*f7220c48SRichard Scheffenegger #include <netinet/ip_var.h> 90*f7220c48SRichard Scheffenegger #include <netinet/ip6.h> 91*f7220c48SRichard Scheffenegger #include <netinet/icmp6.h> 92*f7220c48SRichard Scheffenegger #include <netinet6/nd6.h> 93*f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h> 94*f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h> 95*f7220c48SRichard Scheffenegger #include <netinet/tcp.h> 96*f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h> 97*f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h> 98*f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h> 99*f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h> 100*f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h> 101*f7220c48SRichard Scheffenegger #include <netinet6/tcp6_var.h> 102*f7220c48SRichard Scheffenegger #include <netinet/tcpip.h> 103*f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h> 104*f7220c48SRichard Scheffenegger 105*f7220c48SRichard Scheffenegger 106*f7220c48SRichard Scheffenegger /* 107*f7220c48SRichard Scheffenegger * Process incoming SYN,ACK packet 108*f7220c48SRichard Scheffenegger */ 109*f7220c48SRichard Scheffenegger void 110*f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) 111*f7220c48SRichard Scheffenegger { 112*f7220c48SRichard Scheffenegger thflags &= (TH_CWR|TH_ECE); 113*f7220c48SRichard Scheffenegger 114*f7220c48SRichard Scheffenegger if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && 115*f7220c48SRichard Scheffenegger V_tcp_do_ecn) { 116*f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 117*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_shs); 118*f7220c48SRichard Scheffenegger } 119*f7220c48SRichard Scheffenegger } 120*f7220c48SRichard Scheffenegger 121*f7220c48SRichard Scheffenegger /* 122*f7220c48SRichard Scheffenegger * Handle parallel SYN for ECN 123*f7220c48SRichard Scheffenegger */ 124*f7220c48SRichard Scheffenegger void 125*f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) 126*f7220c48SRichard Scheffenegger { 127*f7220c48SRichard Scheffenegger if (thflags & TH_ACK) 128*f7220c48SRichard Scheffenegger return; 129*f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 0) 130*f7220c48SRichard Scheffenegger return; 131*f7220c48SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) { 132*f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 133*f7220c48SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { 134*f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 135*f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 136*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_shs); 137*f7220c48SRichard Scheffenegger } 138*f7220c48SRichard Scheffenegger } 139*f7220c48SRichard Scheffenegger } 140*f7220c48SRichard Scheffenegger 141*f7220c48SRichard Scheffenegger /* 142*f7220c48SRichard Scheffenegger * TCP ECN processing. 143*f7220c48SRichard Scheffenegger */ 144*f7220c48SRichard Scheffenegger int 145*f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) 146*f7220c48SRichard Scheffenegger { 147*f7220c48SRichard Scheffenegger int delta_ace = 0; 148*f7220c48SRichard Scheffenegger 149*f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_PERMIT) { 150*f7220c48SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 151*f7220c48SRichard Scheffenegger case IPTOS_ECN_CE: 152*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_ce); 153*f7220c48SRichard Scheffenegger break; 154*f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT0: 155*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_ect0); 156*f7220c48SRichard Scheffenegger break; 157*f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT1: 158*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_ect1); 159*f7220c48SRichard Scheffenegger break; 160*f7220c48SRichard Scheffenegger } 161*f7220c48SRichard Scheffenegger 162*f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 163*f7220c48SRichard Scheffenegger if (thflags & TH_ECE) 164*f7220c48SRichard Scheffenegger delta_ace = 1; 165*f7220c48SRichard Scheffenegger if (thflags & TH_CWR) { 166*f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_ECE; 167*f7220c48SRichard Scheffenegger tp->t_flags |= TF_ACKNOW; 168*f7220c48SRichard Scheffenegger } 169*f7220c48SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 170*f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 171*f7220c48SRichard Scheffenegger 172*f7220c48SRichard Scheffenegger /* Process a packet differently from RFC3168. */ 173*f7220c48SRichard Scheffenegger cc_ecnpkt_handler_flags(tp, thflags, iptos); 174*f7220c48SRichard Scheffenegger } 175*f7220c48SRichard Scheffenegger 176*f7220c48SRichard Scheffenegger return delta_ace; 177*f7220c48SRichard Scheffenegger } 178*f7220c48SRichard Scheffenegger 179*f7220c48SRichard Scheffenegger /* 180*f7220c48SRichard Scheffenegger * Send ECN setup <SYN> packet header flags 181*f7220c48SRichard Scheffenegger */ 182*f7220c48SRichard Scheffenegger uint16_t 183*f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp) 184*f7220c48SRichard Scheffenegger { 185*f7220c48SRichard Scheffenegger uint16_t thflags = 0; 186*f7220c48SRichard Scheffenegger 187*f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 1) { 188*f7220c48SRichard Scheffenegger /* Send a RFC3168 ECN setup <SYN> packet */ 189*f7220c48SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 190*f7220c48SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 191*f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 192*f7220c48SRichard Scheffenegger } else 193*f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 194*f7220c48SRichard Scheffenegger } 195*f7220c48SRichard Scheffenegger 196*f7220c48SRichard Scheffenegger return thflags; 197*f7220c48SRichard Scheffenegger } 198*f7220c48SRichard Scheffenegger 199*f7220c48SRichard Scheffenegger /* 200*f7220c48SRichard Scheffenegger * output processing of ECN feature 201*f7220c48SRichard Scheffenegger * returning IP ECN header codepoint 202*f7220c48SRichard Scheffenegger */ 203*f7220c48SRichard Scheffenegger int 204*f7220c48SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len) 205*f7220c48SRichard Scheffenegger { 206*f7220c48SRichard Scheffenegger int ipecn = IPTOS_ECN_NOTECT; 207*f7220c48SRichard Scheffenegger bool newdata; 208*f7220c48SRichard Scheffenegger 209*f7220c48SRichard Scheffenegger /* 210*f7220c48SRichard Scheffenegger * If the peer has ECN, mark data packets with 211*f7220c48SRichard Scheffenegger * ECN capable transmission (ECT). 212*f7220c48SRichard Scheffenegger * Ignore pure control packets, retransmissions 213*f7220c48SRichard Scheffenegger * and window probes. 214*f7220c48SRichard Scheffenegger */ 215*f7220c48SRichard Scheffenegger newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 216*f7220c48SRichard Scheffenegger !((tp->t_flags & TF_FORCEDATA) && len == 1)); 217*f7220c48SRichard Scheffenegger if (newdata) { 218*f7220c48SRichard Scheffenegger ipecn = IPTOS_ECN_ECT0; 219*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_ect0); 220*f7220c48SRichard Scheffenegger } 221*f7220c48SRichard Scheffenegger /* 222*f7220c48SRichard Scheffenegger * Reply with proper ECN notifications. 223*f7220c48SRichard Scheffenegger */ 224*f7220c48SRichard Scheffenegger if (newdata && 225*f7220c48SRichard Scheffenegger (tp->t_flags2 & TF2_ECN_SND_CWR)) { 226*f7220c48SRichard Scheffenegger *thflags |= TH_CWR; 227*f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_CWR; 228*f7220c48SRichard Scheffenegger } 229*f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_SND_ECE) 230*f7220c48SRichard Scheffenegger *thflags |= TH_ECE; 231*f7220c48SRichard Scheffenegger 232*f7220c48SRichard Scheffenegger return ipecn; 233*f7220c48SRichard Scheffenegger } 234*f7220c48SRichard Scheffenegger 235*f7220c48SRichard Scheffenegger /* 236*f7220c48SRichard Scheffenegger * Set up the ECN related tcpcb fields from 237*f7220c48SRichard Scheffenegger * a syncache entry 238*f7220c48SRichard Scheffenegger */ 239*f7220c48SRichard Scheffenegger void 240*f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) 241*f7220c48SRichard Scheffenegger { 242*f7220c48SRichard Scheffenegger if (sc->sc_flags & SCF_ECN) { 243*f7220c48SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN) { 244*f7220c48SRichard Scheffenegger case SCF_ECN: 245*f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 246*f7220c48SRichard Scheffenegger break; 247*f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 248*f7220c48SRichard Scheffenegger default: 249*f7220c48SRichard Scheffenegger break; 250*f7220c48SRichard Scheffenegger } 251*f7220c48SRichard Scheffenegger } 252*f7220c48SRichard Scheffenegger } 253*f7220c48SRichard Scheffenegger 254*f7220c48SRichard Scheffenegger /* 255*f7220c48SRichard Scheffenegger * Process a <SYN> packets ECN information, and provide the 256*f7220c48SRichard Scheffenegger * syncache with the relevant information. 257*f7220c48SRichard Scheffenegger */ 258*f7220c48SRichard Scheffenegger int 259*f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos) 260*f7220c48SRichard Scheffenegger { 261*f7220c48SRichard Scheffenegger int scflags = 0; 262*f7220c48SRichard Scheffenegger 263*f7220c48SRichard Scheffenegger switch (thflags & (TH_CWR|TH_ECE)) { 264*f7220c48SRichard Scheffenegger /* no ECN */ 265*f7220c48SRichard Scheffenegger case (0|0): 266*f7220c48SRichard Scheffenegger break; 267*f7220c48SRichard Scheffenegger /* legacy ECN */ 268*f7220c48SRichard Scheffenegger case (TH_CWR|TH_ECE): 269*f7220c48SRichard Scheffenegger scflags = SCF_ECN; 270*f7220c48SRichard Scheffenegger break; 271*f7220c48SRichard Scheffenegger default: 272*f7220c48SRichard Scheffenegger break; 273*f7220c48SRichard Scheffenegger } 274*f7220c48SRichard Scheffenegger return scflags; 275*f7220c48SRichard Scheffenegger } 276*f7220c48SRichard Scheffenegger 277*f7220c48SRichard Scheffenegger /* 278*f7220c48SRichard Scheffenegger * Set up the ECN information for the <SYN,ACK> from 279*f7220c48SRichard Scheffenegger * syncache information. 280*f7220c48SRichard Scheffenegger */ 281*f7220c48SRichard Scheffenegger uint16_t 282*f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) 283*f7220c48SRichard Scheffenegger { 284*f7220c48SRichard Scheffenegger if ((thflags & TH_SYN) && 285*f7220c48SRichard Scheffenegger (sc->sc_flags & SCF_ECN)) { 286*f7220c48SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN) { 287*f7220c48SRichard Scheffenegger case SCF_ECN: 288*f7220c48SRichard Scheffenegger thflags |= (0 | TH_ECE); 289*f7220c48SRichard Scheffenegger KMOD_TCPSTAT_INC(tcps_ecn_shs); 290*f7220c48SRichard Scheffenegger break; 291*f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 292*f7220c48SRichard Scheffenegger default: 293*f7220c48SRichard Scheffenegger break; 294*f7220c48SRichard Scheffenegger } 295*f7220c48SRichard Scheffenegger } 296*f7220c48SRichard Scheffenegger return thflags; 297*f7220c48SRichard Scheffenegger } 298