xref: /freebsd/sys/netinet/tcp_stacks/sack_filter.h (revision fce03f85c5bfc0d73fb5c43ac1affad73efab11a)
1 #ifndef __sack_filter_h__
2 #define __sack_filter_h__
3 /*-
4  * Copyright (c) 2017-9 Netflix, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /**
29  *
30  * The Sack filter is designed to do two functions, first it trys to reduce
31  * the processing of sacks. Consider that often times you get something like
32  *
33  * ack 1 (sack 100:200)
34  * ack 1 (sack 100:300)
35  * ack 1 (sack(100:400)
36  *
37  * You really want to process the 100:200 and then on the next sack process
38  * only 200:300 (the new data) and then finally on the third 300:400. The filter
39  * removes from your processing routines the already processed sack information so
40  * that after the filter completes you only have "new" sacks that you have not
41  * processed. This saves computation time so you do not need to worry about
42  * previously processed sack information.
43  *
44  * The second thing that the sack filter does is help protect against malicious
45  * attackers that are trying to attack any linked lists (or other data structures)
46  * that are used in sack processing. Consider an attacker sending in sacks for
47  * every other byte of data outstanding. This could in theory drastically split
48  * up any scoreboard you are maintaining and make you search through a very large
49  * linked list (or other structure) eating up CPU. If you split far enough and
50  * fracture your data structure enough you could potentially be crippled by a malicious
51  * peer. How the filter works here is it filters out sacks that are less than an MSS.
52  * We do this because generally a packet (aka MSS) should be kept whole. The only place
53  * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows
54  * TLP to still work properly and yet protects us from splitting. The filter also only allows
55  * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations
56  * are being sent we discard additional ones until the earlier holes are filled up. The maximum
57  * the current filter can be is 15, which we have moved to since we want to be as generous as
58  * possible with allowing for loss. However, in previous testing of the filter it was found
59  * that there was very little benefit from moving from 7 to 15 sack points. Though at
60  * that previous set of tests, we would just discard earlier information in the filter. Now
61  * that we do not do that i.e. discard information and instead drop sack data we have raised
62  * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase
63  * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or
64  * move to a true bitstring. If this is done however it further increases your risk to
65  * sack attacks, the bigger the number of splits (filter blocks) that are allowed
66  * the larger your processing arrays will grow as well as the filter.
67  *
68  * Note that this protection does not prevent an attacker from asking for a 20 byte
69  * MSS, that protection must be done elsewhere during the negotiation of the connection
70  * and is done now by just ignoring sack's from connections with too small of MSS which
71  * prevents sack from working and thus makes the connection less efficient but protects
72  * the system from harm.
73  *
74  * We may actually want to consider dropping the size of the array back to 7 to further
75  * protect the system which would be a more cautious approach.
76  *
77  * TCP Developer information:
78  *
79  * To use the sack filter its actually pretty simple. All you do is the normal sorting
80  * and sanity checks of your sacks but then after that you call out to sack_filter_blks()
81  * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the
82  * pointer to the sackblk array and how many sorted valid blocks there are as well
83  * as what the new th_ack point is. The filter will return to you the number of
84  * blocks left after filtering. It will reshape the blocks based on the previous
85  * sacks you have received and processed. If sack_filter_blks() returns 0 then no
86  * new sack data is present to be processed.
87  *
88  * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with
89  * the snd_una point. You also need to call this if you invalidate your sack array for any
90  * reason (such as RTO's or MTU changes or some other thing that makes you think all
91  * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to
92  * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as
93  * well. So putting these two together, anytime the cum-ack moves forward you probably want to
94  * do:
95  * if (sack_filter_blks_used(sf))
96  *    sack_filter_blks(tp, sf, NULL, 0, th_ack);
97  *
98  * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space
99  * for example to split your sack-array. You can "undo" the data within the sack filter by calling
100  * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter.
101  * You can see an example of this use in bbr.c though rack.c has never found it needed.
102  *
103  */
104 
105 #define SACK_FILTER_BLOCKS 15
106 
107 struct sack_filter {
108 	tcp_seq sf_ack;
109 	uint16_t sf_bits;
110 	uint8_t sf_cur;
111 	uint8_t sf_used;
112 	struct sackblk sf_blks[SACK_FILTER_BLOCKS];
113 };
114 #ifdef _KERNEL
115 void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
116 int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
117 		     tcp_seq th_ack);
118 void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
119 static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
120 {
121 	return (sf->sf_used);
122 }
123 
124 #endif
125 #endif
126