xref: /freebsd/sys/netinet/tcp_fastopen.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /*-
2  * Copyright (c) 2015-2017 Patrick Kelsey
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29  * this code, add the following line to your kernel config:
30  *
31  * options TCP_RFC7413
32  *
33  *
34  * The generated TFO cookies are the 64-bit output of
35  * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36  * keys are supported so that time-based rolling cookie invalidation
37  * policies can be implemented in the system.  The default number of
38  * concurrent keys is 2.  This can be adjusted in the kernel config as
39  * follows:
40  *
41  * options TCP_RFC7413_MAX_KEYS=<num-keys>
42  *
43  *
44  * In addition to the facilities defined in RFC7413, this implementation
45  * supports a pre-shared key (PSK) mode of operation in which the TFO server
46  * requires the client to be in posession of a shared secret in order for
47  * the client to be able to successfully open TFO connections with the
48  * server.  This is useful, for example, in environments where TFO servers
49  * are exposed to both internal and external clients and only wish to allow
50  * TFO connections from internal clients.
51  *
52  * In the PSK mode of operation, the server generates and sends TFO cookies
53  * to requesting clients as usual.  However, when validating cookies
54  * received in TFO SYNs from clients, the server requires the
55  * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56  * msg=<cookie-sent-to-client>).
57  *
58  * Multiple concurrent valid pre-shared keys are supported so that
59  * time-based rolling PSK invalidation policies can be implemented in the
60  * system.  The default number of concurrent pre-shared keys is 2.  This can
61  * be adjusted in the kernel config as follows:
62  *
63  * options TCP_RFC7413_MAX_PSKS=<num-psks>
64  *
65  *
66  * The following TFO-specific sysctls are defined:
67  *
68  * net.inet.tcp.fastopen.acceptany (RW, default 0)
69  *     When non-zero, all client-supplied TFO cookies will be considered to
70  *     be valid.
71  *
72  * net.inet.tcp.fastopen.autokey (RW, default 120)
73  *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74  *     key will be automatically generated after this many seconds.
75  *
76  * net.inet.tcp.fastopen.ccache_bucket_limit
77  *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78  *     The maximum number of entries in a client cookie cache bucket.
79  *
80  * net.inet.tcp.fastopen.ccache_buckets
81  *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82  *     The number of client cookie cache buckets.
83  *
84  * net.inet.tcp.fastopen.ccache_list (RO)
85  *     Print the client cookie cache.
86  *
87  * net.inet.tcp.fastopen.client_enable (RW, default 0)
88  *     When zero, no new active (i.e., client) TFO connections can be
89  *     created.  On the transition from enabled to disabled, the client
90  *     cookie cache is cleared and disabled.  The transition from enabled to
91  *     disabled does not affect any active TFO connections in progress; it
92  *     only prevents new ones from being made.
93  *
94  * net.inet.tcp.fastopen.keylen (RD)
95  *     The key length in bytes.
96  *
97  * net.inet.tcp.fastopen.maxkeys (RD)
98  *     The maximum number of keys supported.
99  *
100  * net.inet.tcp.fastopen.maxpsks (RD)
101  *     The maximum number of pre-shared keys supported.
102  *
103  * net.inet.tcp.fastopen.numkeys (RD)
104  *     The current number of keys installed.
105  *
106  * net.inet.tcp.fastopen.numpsks (RD)
107  *     The current number of pre-shared keys installed.
108  *
109  * net.inet.tcp.fastopen.path_disable_time
110  *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111  *     When a failure occurs while trying to create a new active (i.e.,
112  *     client) TFO connection, new active connections on the same path, as
113  *     determined by the tuple {client_ip, server_ip, server_port}, will be
114  *     forced to be non-TFO for this many seconds.  Note that the path
115  *     disable mechanism relies on state stored in client cookie cache
116  *     entries, so it is possible for the disable time for a given path to
117  *     be reduced if the corresponding client cookie cache entry is reused
118  *     due to resource pressure before the disable period has elapsed.
119  *
120  * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121  *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122  *     servers.  On the transition from enabled to disabled, all installed
123  *     pre-shared keys are removed.
124  *
125  * net.inet.tcp.fastopen.server_enable (RW, default 0)
126  *     When zero, no new passive (i.e., server) TFO connections can be
127  *     created.  On the transition from enabled to disabled, all installed
128  *     keys and pre-shared keys are removed.  On the transition from
129  *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130  *     there are no keys installed, a new key will be generated immediately.
131  *     The transition from enabled to disabled does not affect any passive
132  *     TFO connections in progress; it only prevents new ones from being
133  *     made.
134  *
135  * net.inet.tcp.fastopen.setkey (WR)
136  *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137  *     this sysctl.
138  *
139  * net.inet.tcp.fastopen.setpsk (WR)
140  *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141  *     bytes to this sysctl.
142  *
143  * In order for TFO connections to be created via a listen socket, that
144  * socket must have the TCP_FASTOPEN socket option set on it.  This option
145  * can be set on the socket either before or after the listen() is invoked.
146  * Clearing this option on a listen socket after it has been set has no
147  * effect on existing TFO connections or TFO connections in progress; it
148  * only prevents new TFO connections from being made.
149  *
150  * For passively-created sockets, the TCP_FASTOPEN socket option can be
151  * queried to determine whether the connection was established using TFO.
152  * Note that connections that are established via a TFO SYN, but that fall
153  * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154  * set.
155  *
156  * Per the RFC, this implementation limits the number of TFO connections
157  * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158  * Whenever this limit is exceeded, requests for new TFO connections are
159  * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160  * cookie, an attacker could keep the listen queue in an overflow condition
161  * using a TFO SYN flood.  This implementation sets the limit at half the
162  * configured listen backlog.
163  *
164  */
165 
166 #include <sys/cdefs.h>
167 #include "opt_inet.h"
168 
169 #include <sys/param.h>
170 #include <sys/jail.h>
171 #include <sys/kernel.h>
172 #include <sys/hash.h>
173 #include <sys/limits.h>
174 #include <sys/lock.h>
175 #include <sys/proc.h>
176 #include <sys/rmlock.h>
177 #include <sys/sbuf.h>
178 #include <sys/socket.h>
179 #include <sys/socketvar.h>
180 #include <sys/sysctl.h>
181 #include <sys/systm.h>
182 
183 #include <crypto/siphash/siphash.h>
184 
185 #include <net/vnet.h>
186 
187 #include <netinet/in.h>
188 #include <netinet/in_pcb.h>
189 #include <netinet/tcp_var.h>
190 #include <netinet/tcp_fastopen.h>
191 
192 #define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
193 
194 #if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
195 #error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
196 #endif
197 
198 /*
199  * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
200  * the PSK until the connect occurs.
201  */
202 #if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
203 #error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
204 #endif
205 
206 #define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT	16
207 #define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT		2048 /* must be power of 2 */
208 
209 #define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT		900 /* seconds */
210 
211 #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
212 #define	TCP_FASTOPEN_MAX_KEYS	2
213 #else
214 #define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
215 #endif
216 
217 #if TCP_FASTOPEN_MAX_KEYS > 10
218 #undef TCP_FASTOPEN_MAX_KEYS
219 #define	TCP_FASTOPEN_MAX_KEYS	10
220 #endif
221 
222 #if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
223 #define	TCP_FASTOPEN_MAX_PSKS	2
224 #else
225 #define	TCP_FASTOPEN_MAX_PSKS	TCP_RFC7413_MAX_PSKS
226 #endif
227 
228 #if TCP_FASTOPEN_MAX_PSKS > 10
229 #undef TCP_FASTOPEN_MAX_PSKS
230 #define	TCP_FASTOPEN_MAX_PSKS	10
231 #endif
232 
233 struct tcp_fastopen_keylist {
234 	unsigned int newest;
235 	unsigned int newest_psk;
236 	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
237 	uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
238 };
239 
240 struct tcp_fastopen_callout {
241 	struct callout c;
242 	struct vnet *v;
243 };
244 
245 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
246     struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
247 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
248     struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
249     uint8_t *);
250 static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
251     unsigned int);
252 static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
253     struct tcp_fastopen_ccache_bucket *);
254 
255 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
256     "TCP Fast Open");
257 
258 VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
259 #define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
260 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
261     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
262     "Accept any non-empty cookie");
263 
264 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
265 #define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
266 static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
267 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
268     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
269     NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU",
270     "Number of seconds between auto-generation of a new key; zero disables");
271 
272 static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
273 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
274     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT,
275     NULL, 0, &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
276     "Max entries per bucket in client cookie cache");
277 
278 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
279     TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
280 #define	V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
281 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
282     CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
283     "Client cookie cache number of buckets (power of 2)");
284 
285 VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
286 static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
287 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
288     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
289     NULL, 0, &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
290     "Enable/disable TCP Fast Open client functionality");
291 
292 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
293     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
294     "Key length in bytes");
295 
296 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
297     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
298     "Maximum number of keys supported");
299 
300 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
301     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
302     "Maximum number of pre-shared keys supported");
303 
304 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
305 #define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
306 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
307     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
308     "Number of keys installed");
309 
310 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
311 #define	V_tcp_fastopen_numpsks	VNET(tcp_fastopen_numpsks)
312 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
313     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
314     "Number of pre-shared keys installed");
315 
316 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
317     TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
318 #define	V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
319 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
320     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
321     "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
322 
323 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
324 #define	V_tcp_fastopen_psk_enable	VNET(tcp_fastopen_psk_enable)
325 static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
326 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
327     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
328     NULL, 0, &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
329     "Enable/disable TCP Fast Open server pre-shared key mode");
330 
331 VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
332 static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
333 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
334     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
335     NULL, 0, &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
336     "Enable/disable TCP Fast Open server functionality");
337 
338 static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
339 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
340     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
341     NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "",
342     "Install a new key");
343 
344 static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
345 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
346     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
347     NULL, 0, &sysctl_net_inet_tcp_fastopen_setpsk, "",
348     "Install a new pre-shared key");
349 
350 static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
351 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
352     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
353     NULL, 0, sysctl_net_inet_tcp_fastopen_ccache_list, "A",
354     "List of all client cookie cache entries");
355 
356 VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
357 #define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
358 
359 #define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
360 #define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
361 #define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
362 #define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
363 
364 VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
365 #define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
366 
367 VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
368 #define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
369 
370 VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
371 #define	V_counter_zone			VNET(counter_zone)
372 
373 static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
374 
375 VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
376 #define V_tcp_fastopen_ccache	VNET(tcp_fastopen_ccache)
377 
378 #define	CCB_LOCK(ccb)		mtx_lock(&(ccb)->ccb_mtx)
379 #define	CCB_UNLOCK(ccb)		mtx_unlock(&(ccb)->ccb_mtx)
380 #define	CCB_LOCK_ASSERT(ccb)	mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
381 
382 void
383 tcp_fastopen_init(void)
384 {
385 	unsigned int i;
386 
387 	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
388 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
389 	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
390 	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
391 	    &V_tcp_fastopen_keylock, 0);
392 	V_tcp_fastopen_autokey_ctx.v = curvnet;
393 	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
394 	V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
395 
396 	/* May already be non-zero if kernel tunable was set */
397 	if (V_tcp_fastopen_ccache.bucket_limit == 0)
398 		V_tcp_fastopen_ccache.bucket_limit =
399 		    TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
400 
401 	/* May already be non-zero if kernel tunable was set */
402 	if ((V_tcp_fastopen_ccache_buckets == 0) ||
403 	    !powerof2(V_tcp_fastopen_ccache_buckets))
404 		V_tcp_fastopen_ccache.buckets =
405 			TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
406 	else
407 		V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
408 
409 	V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
410 	V_tcp_fastopen_ccache.secret = arc4random();
411 
412 	V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
413 	    sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
414 	    M_WAITOK | M_ZERO);
415 
416 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
417 		TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
418 		mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
419 			 NULL, MTX_DEF);
420 		if (V_tcp_fastopen_client_enable) {
421 			/* enable bucket */
422 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
423 		} else {
424 			/* disable bucket */
425 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
426 		}
427 		V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
428 	}
429 
430 	/*
431 	 * Note that while the total number of entries in the cookie cache
432 	 * is limited by the table management logic to
433 	 * V_tcp_fastopen_ccache.buckets *
434 	 * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
435 	 * this zone can exceed that amount by the number of CPUs in the
436 	 * system times the maximum number of unallocated items that can be
437 	 * present in each UMA per-CPU cache for this zone.
438 	 */
439 	V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
440 	    sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
441 	    UMA_ALIGN_CACHE, 0);
442 }
443 
444 void
445 tcp_fastopen_destroy(void)
446 {
447 	struct tcp_fastopen_ccache_bucket *ccb;
448 	unsigned int i;
449 
450 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
451 		ccb = &V_tcp_fastopen_ccache.base[i];
452 		tcp_fastopen_ccache_bucket_trim(ccb, 0);
453 		mtx_destroy(&ccb->ccb_mtx);
454 	}
455 
456 	KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
457 	    ("%s: TFO ccache zone allocation count not 0", __func__));
458 	uma_zdestroy(V_tcp_fastopen_ccache.zone);
459 	free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
460 
461 	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
462 	rm_destroy(&V_tcp_fastopen_keylock);
463 	uma_zdestroy(V_counter_zone);
464 }
465 
466 unsigned int *
467 tcp_fastopen_alloc_counter(void)
468 {
469 	unsigned int *counter;
470 	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
471 	if (counter)
472 		*counter = 1;
473 	return (counter);
474 }
475 
476 void
477 tcp_fastopen_decrement_counter(unsigned int *counter)
478 {
479 	if (*counter == 1)
480 		uma_zfree(V_counter_zone, counter);
481 	else
482 		atomic_subtract_int(counter, 1);
483 }
484 
485 static void
486 tcp_fastopen_addkey_locked(uint8_t *key)
487 {
488 
489 	V_tcp_fastopen_keys.newest++;
490 	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
491 		V_tcp_fastopen_keys.newest = 0;
492 	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
493 	    TCP_FASTOPEN_KEY_LEN);
494 	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
495 		V_tcp_fastopen_numkeys++;
496 }
497 
498 static void
499 tcp_fastopen_addpsk_locked(uint8_t *psk)
500 {
501 
502 	V_tcp_fastopen_keys.newest_psk++;
503 	if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
504 		V_tcp_fastopen_keys.newest_psk = 0;
505 	memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
506 	    TCP_FASTOPEN_KEY_LEN);
507 	if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
508 		V_tcp_fastopen_numpsks++;
509 }
510 
511 static void
512 tcp_fastopen_autokey_locked(void)
513 {
514 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
515 
516 	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
517 	tcp_fastopen_addkey_locked(newkey);
518 }
519 
520 static void
521 tcp_fastopen_autokey_callout(void *arg)
522 {
523 	struct tcp_fastopen_callout *ctx = arg;
524 
525 	CURVNET_SET(ctx->v);
526 	tcp_fastopen_autokey_locked();
527 	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
528 		      tcp_fastopen_autokey_callout, ctx);
529 	CURVNET_RESTORE();
530 }
531 
532 static uint64_t
533 tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
534 {
535 	SIPHASH_CTX ctx;
536 	uint64_t siphash;
537 
538 	SipHash24_Init(&ctx);
539 	SipHash_SetKey(&ctx, key);
540 	switch (inc->inc_flags & INC_ISIPV6) {
541 #ifdef INET
542 	case 0:
543 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
544 		break;
545 #endif
546 #ifdef INET6
547 	case INC_ISIPV6:
548 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
549 		break;
550 #endif
551 	}
552 	SipHash_Final((u_int8_t *)&siphash, &ctx);
553 
554 	return (siphash);
555 }
556 
557 static uint64_t
558 tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
559 {
560 	SIPHASH_CTX ctx;
561 	uint64_t psk_cookie;
562 
563 	SipHash24_Init(&ctx);
564 	SipHash_SetKey(&ctx, psk);
565 	SipHash_Update(&ctx, cookie, cookie_len);
566 	SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
567 
568 	return (psk_cookie);
569 }
570 
571 static int
572 tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
573 {
574 	unsigned int i, psk_index;
575 	uint64_t psk_cookie;
576 
577 	if (V_tcp_fastopen_psk_enable) {
578 		psk_index = V_tcp_fastopen_keys.newest_psk;
579 		for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
580 			psk_cookie =
581 			    tcp_fastopen_make_psk_cookie(
582 				 V_tcp_fastopen_keys.psk[psk_index],
583 				 (uint8_t *)cur_cookie,
584 				 TCP_FASTOPEN_COOKIE_LEN);
585 
586 			if (memcmp(wire_cookie, &psk_cookie,
587 				   TCP_FASTOPEN_COOKIE_LEN) == 0)
588 				return (1);
589 
590 			if (psk_index == 0)
591 				psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
592 			else
593 				psk_index--;
594 		}
595 	} else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
596 		return (1);
597 
598 	return (0);
599 }
600 
601 /*
602  * Return values:
603  *	-1	the cookie is invalid and no valid cookie is available
604  *	 0	the cookie is invalid and the latest cookie has been returned
605  *	 1	the cookie is valid and the latest cookie has been returned
606  */
607 int
608 tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
609     unsigned int len, uint64_t *latest_cookie)
610 {
611 	struct rm_priotracker tracker;
612 	unsigned int i, key_index;
613 	int rv;
614 	uint64_t cur_cookie;
615 
616 	if (V_tcp_fastopen_acceptany) {
617 		*latest_cookie = 0;
618 		return (1);
619 	}
620 
621 	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
622 	if (len != TCP_FASTOPEN_COOKIE_LEN) {
623 		if (V_tcp_fastopen_numkeys > 0) {
624 			*latest_cookie =
625 			    tcp_fastopen_make_cookie(
626 				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
627 				inc);
628 			rv = 0;
629 		} else
630 			rv = -1;
631 		goto out;
632 	}
633 
634 	/*
635 	 * Check against each available key, from newest to oldest.
636 	 */
637 	key_index = V_tcp_fastopen_keys.newest;
638 	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
639 		cur_cookie =
640 		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
641 			inc);
642 		if (i == 0)
643 			*latest_cookie = cur_cookie;
644 		rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
645 		if (rv)
646 			goto out;
647 		if (key_index == 0)
648 			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
649 		else
650 			key_index--;
651 	}
652 	rv = 0;
653 
654  out:
655 	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
656 	return (rv);
657 }
658 
659 static int
660 sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
661 {
662 	int error;
663 	unsigned int new;
664 
665 	new = V_tcp_fastopen_autokey;
666 	error = sysctl_handle_int(oidp, &new, 0, req);
667 	if (error == 0 && req->newptr) {
668 		if (new > (INT_MAX / hz))
669 			return (EINVAL);
670 
671 		TCP_FASTOPEN_KEYS_WLOCK();
672 		if (V_tcp_fastopen_server_enable) {
673 			if (V_tcp_fastopen_autokey && !new)
674 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
675 			else if (new)
676 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
677 				    new * hz, tcp_fastopen_autokey_callout,
678 				    &V_tcp_fastopen_autokey_ctx);
679 		}
680 		V_tcp_fastopen_autokey = new;
681 		TCP_FASTOPEN_KEYS_WUNLOCK();
682 	}
683 
684 	return (error);
685 }
686 
687 static int
688 sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
689 {
690 	int error;
691 	unsigned int new;
692 
693 	new = V_tcp_fastopen_psk_enable;
694 	error = sysctl_handle_int(oidp, &new, 0, req);
695 	if (error == 0 && req->newptr) {
696 		if (V_tcp_fastopen_psk_enable && !new) {
697 			/* enabled -> disabled */
698 			TCP_FASTOPEN_KEYS_WLOCK();
699 			V_tcp_fastopen_numpsks = 0;
700 			V_tcp_fastopen_keys.newest_psk =
701 			    TCP_FASTOPEN_MAX_PSKS - 1;
702 			V_tcp_fastopen_psk_enable = 0;
703 			TCP_FASTOPEN_KEYS_WUNLOCK();
704 		} else if (!V_tcp_fastopen_psk_enable && new) {
705 			/* disabled -> enabled */
706 			TCP_FASTOPEN_KEYS_WLOCK();
707 			V_tcp_fastopen_psk_enable = 1;
708 			TCP_FASTOPEN_KEYS_WUNLOCK();
709 		}
710 	}
711 	return (error);
712 }
713 
714 static int
715 sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
716 {
717 	int error;
718 	unsigned int new;
719 
720 	new = V_tcp_fastopen_server_enable;
721 	error = sysctl_handle_int(oidp, &new, 0, req);
722 	if (error == 0 && req->newptr) {
723 		if (V_tcp_fastopen_server_enable && !new) {
724 			/* enabled -> disabled */
725 			TCP_FASTOPEN_KEYS_WLOCK();
726 			V_tcp_fastopen_numkeys = 0;
727 			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
728 			if (V_tcp_fastopen_autokey)
729 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
730 			V_tcp_fastopen_numpsks = 0;
731 			V_tcp_fastopen_keys.newest_psk =
732 			    TCP_FASTOPEN_MAX_PSKS - 1;
733 			V_tcp_fastopen_server_enable = 0;
734 			TCP_FASTOPEN_KEYS_WUNLOCK();
735 		} else if (!V_tcp_fastopen_server_enable && new) {
736 			/* disabled -> enabled */
737 			TCP_FASTOPEN_KEYS_WLOCK();
738 			if (V_tcp_fastopen_autokey &&
739 			    (V_tcp_fastopen_numkeys == 0)) {
740 				tcp_fastopen_autokey_locked();
741 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
742 				    V_tcp_fastopen_autokey * hz,
743 				    tcp_fastopen_autokey_callout,
744 				    &V_tcp_fastopen_autokey_ctx);
745 			}
746 			V_tcp_fastopen_server_enable = 1;
747 			TCP_FASTOPEN_KEYS_WUNLOCK();
748 		}
749 	}
750 	return (error);
751 }
752 
753 static int
754 sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
755 {
756 	int error;
757 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
758 
759 	if (req->oldptr != NULL || req->oldlen != 0)
760 		return (EINVAL);
761 	if (req->newptr == NULL)
762 		return (EPERM);
763 	if (req->newlen != sizeof(newkey))
764 		return (EINVAL);
765 	error = SYSCTL_IN(req, newkey, sizeof(newkey));
766 	if (error)
767 		return (error);
768 
769 	TCP_FASTOPEN_KEYS_WLOCK();
770 	tcp_fastopen_addkey_locked(newkey);
771 	TCP_FASTOPEN_KEYS_WUNLOCK();
772 
773 	return (0);
774 }
775 
776 static int
777 sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
778 {
779 	int error;
780 	uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
781 
782 	if (req->oldptr != NULL || req->oldlen != 0)
783 		return (EINVAL);
784 	if (req->newptr == NULL)
785 		return (EPERM);
786 	if (req->newlen != sizeof(newpsk))
787 		return (EINVAL);
788 	error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
789 	if (error)
790 		return (error);
791 
792 	TCP_FASTOPEN_KEYS_WLOCK();
793 	tcp_fastopen_addpsk_locked(newpsk);
794 	TCP_FASTOPEN_KEYS_WUNLOCK();
795 
796 	return (0);
797 }
798 
799 static int
800 sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
801 {
802 	struct tcp_fastopen_ccache_bucket *ccb;
803 	int error;
804 	unsigned int new;
805 	unsigned int i;
806 
807 	new = V_tcp_fastopen_ccache.bucket_limit;
808 	error = sysctl_handle_int(oidp, &new, 0, req);
809 	if (error == 0 && req->newptr) {
810 		if ((new == 0) || (new > INT_MAX))
811 			error = EINVAL;
812 		else {
813 			if (new < V_tcp_fastopen_ccache.bucket_limit) {
814 				for (i = 0; i < V_tcp_fastopen_ccache.buckets;
815 				     i++) {
816 					ccb = &V_tcp_fastopen_ccache.base[i];
817 					tcp_fastopen_ccache_bucket_trim(ccb, new);
818 				}
819 			}
820 			V_tcp_fastopen_ccache.bucket_limit = new;
821 		}
822 	}
823 	return (error);
824 }
825 
826 static int
827 sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
828 {
829 	struct tcp_fastopen_ccache_bucket *ccb;
830 	int error;
831 	unsigned int new, i;
832 
833 	new = V_tcp_fastopen_client_enable;
834 	error = sysctl_handle_int(oidp, &new, 0, req);
835 	if (error == 0 && req->newptr) {
836 		if (V_tcp_fastopen_client_enable && !new) {
837 			/* enabled -> disabled */
838 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
839 				ccb = &V_tcp_fastopen_ccache.base[i];
840 				KASSERT(ccb->ccb_num_entries > -1,
841 				    ("%s: ccb->ccb_num_entries %d is negative",
842 					__func__, ccb->ccb_num_entries));
843 				tcp_fastopen_ccache_bucket_trim(ccb, 0);
844 			}
845 			V_tcp_fastopen_client_enable = 0;
846 		} else if (!V_tcp_fastopen_client_enable && new) {
847 			/* disabled -> enabled */
848 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
849 				ccb = &V_tcp_fastopen_ccache.base[i];
850 				CCB_LOCK(ccb);
851 				KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
852 				    ("%s: ccb->ccb_entries not empty", __func__));
853 				KASSERT(ccb->ccb_num_entries == -1,
854 				    ("%s: ccb->ccb_num_entries %d not -1", __func__,
855 					ccb->ccb_num_entries));
856 				ccb->ccb_num_entries = 0; /* enable bucket */
857 				CCB_UNLOCK(ccb);
858 			}
859 			V_tcp_fastopen_client_enable = 1;
860 		}
861 	}
862 	return (error);
863 }
864 
865 void
866 tcp_fastopen_connect(struct tcpcb *tp)
867 {
868 	struct inpcb *inp = tptoinpcb(tp);
869 	struct tcp_fastopen_ccache_bucket *ccb;
870 	struct tcp_fastopen_ccache_entry *cce;
871 	sbintime_t now;
872 	uint16_t server_mss;
873 	uint64_t psk_cookie;
874 
875 	psk_cookie = 0;
876 	cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
877 	if (cce) {
878 		if (cce->disable_time == 0) {
879 			if ((cce->cookie_len > 0) &&
880 			    (tp->t_tfo_client_cookie_len ==
881 			     TCP_FASTOPEN_PSK_LEN)) {
882 				psk_cookie =
883 				    tcp_fastopen_make_psk_cookie(
884 					tp->t_tfo_cookie.client,
885 					cce->cookie, cce->cookie_len);
886 			} else {
887 				tp->t_tfo_client_cookie_len = cce->cookie_len;
888 				memcpy(tp->t_tfo_cookie.client, cce->cookie,
889 				    cce->cookie_len);
890 			}
891 			server_mss = cce->server_mss;
892 			CCB_UNLOCK(ccb);
893 			if (tp->t_tfo_client_cookie_len ==
894 			    TCP_FASTOPEN_PSK_LEN && psk_cookie) {
895 				tp->t_tfo_client_cookie_len =
896 				    TCP_FASTOPEN_COOKIE_LEN;
897 				memcpy(tp->t_tfo_cookie.client, &psk_cookie,
898 				    TCP_FASTOPEN_COOKIE_LEN);
899 			}
900 			tcp_mss(tp, server_mss ? server_mss : -1);
901 			tp->snd_wnd = tp->t_maxseg;
902 		} else {
903 			/*
904 			 * The path is disabled.  Check the time and
905 			 * possibly re-enable.
906 			 */
907 			now = getsbinuptime();
908 			if (now - cce->disable_time >
909 			    ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
910 				/*
911 				 * Re-enable path.  Force a TFO cookie
912 				 * request.  Forget the old MSS as it may be
913 				 * bogus now, and we will rediscover it in
914 				 * the SYN|ACK.
915 				 */
916 				cce->disable_time = 0;
917 				cce->server_mss = 0;
918 				cce->cookie_len = 0;
919 				/*
920 				 * tp->t_tfo... cookie details are already
921 				 * zero from the tcpcb init.
922 				 */
923 			} else {
924 				/*
925 				 * Path is disabled, so disable TFO on this
926 				 * connection.
927 				 */
928 				tp->t_flags &= ~TF_FASTOPEN;
929 			}
930 			CCB_UNLOCK(ccb);
931 			tcp_mss(tp, -1);
932 			/*
933 			 * snd_wnd is irrelevant since we are either forcing
934 			 * a TFO cookie request or disabling TFO - either
935 			 * way, no data with the SYN.
936 			 */
937 		}
938 	} else {
939 		/*
940 		 * A new entry for this path will be created when a SYN|ACK
941 		 * comes back, or the attempt otherwise fails.
942 		 */
943 		CCB_UNLOCK(ccb);
944 		tcp_mss(tp, -1);
945 		/*
946 		 * snd_wnd is irrelevant since we are forcing a TFO cookie
947 		 * request.
948 		 */
949 	}
950 }
951 
952 void
953 tcp_fastopen_disable_path(struct tcpcb *tp)
954 {
955 	struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
956 	struct tcp_fastopen_ccache_bucket *ccb;
957 	struct tcp_fastopen_ccache_entry *cce;
958 
959 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
960 	if (cce) {
961 		cce->server_mss = 0;
962 		cce->cookie_len = 0;
963 		/*
964 		 * Preserve the existing disable time if it is already
965 		 * disabled.
966 		 */
967 		if (cce->disable_time == 0)
968 			cce->disable_time = getsbinuptime();
969 	} else /* use invalid cookie len to create disabled entry */
970 		tcp_fastopen_ccache_create(ccb, inc, 0,
971 	   	    TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
972 
973 	CCB_UNLOCK(ccb);
974 	tp->t_flags &= ~TF_FASTOPEN;
975 }
976 
977 void
978 tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
979     uint8_t cookie_len, uint8_t *cookie)
980 {
981 	struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
982 	struct tcp_fastopen_ccache_bucket *ccb;
983 	struct tcp_fastopen_ccache_entry *cce;
984 
985 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
986 	if (cce) {
987 		if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
988 		    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
989 		    ((cookie_len & 0x1) == 0)) {
990 			cce->server_mss = mss;
991 			cce->cookie_len = cookie_len;
992 			memcpy(cce->cookie, cookie, cookie_len);
993 			cce->disable_time = 0;
994 		} else {
995 			/* invalid cookie length, disable entry */
996 			cce->server_mss = 0;
997 			cce->cookie_len = 0;
998 			/*
999 			 * Preserve the existing disable time if it is
1000 			 * already disabled.
1001 			 */
1002 			if (cce->disable_time == 0)
1003 				cce->disable_time = getsbinuptime();
1004 		}
1005 	} else
1006 		tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1007 
1008 	CCB_UNLOCK(ccb);
1009 }
1010 
1011 static struct tcp_fastopen_ccache_entry *
1012 tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1013     struct tcp_fastopen_ccache_bucket **ccbp)
1014 {
1015 	struct tcp_fastopen_ccache_bucket *ccb;
1016 	struct tcp_fastopen_ccache_entry *cce;
1017 	uint32_t last_word;
1018 	uint32_t hash;
1019 
1020 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1021 	    V_tcp_fastopen_ccache.secret);
1022 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1023 	    hash);
1024 	last_word = inc->inc_fport;
1025 	hash = jenkins_hash32(&last_word, 1, hash);
1026 	ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1027 	*ccbp = ccb;
1028 	CCB_LOCK(ccb);
1029 
1030 	/*
1031 	 * Always returns with locked bucket.
1032 	 */
1033 	TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1034 		if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1035 		    (cce->server_port == inc->inc_ie.ie_fport) &&
1036 		    (((cce->af == AF_INET) &&
1037 		      (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1038 		      (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1039 		     ((cce->af == AF_INET6) &&
1040 		      IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1041 		      IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1042 			break;
1043 
1044 	return (cce);
1045 }
1046 
1047 static struct tcp_fastopen_ccache_entry *
1048 tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1049     struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1050 {
1051 	struct tcp_fastopen_ccache_entry *cce;
1052 
1053 	/*
1054 	 * 1. Create a new entry, or
1055 	 * 2. Reclaim an existing entry, or
1056 	 * 3. Fail
1057 	 */
1058 
1059 	CCB_LOCK_ASSERT(ccb);
1060 
1061 	cce = NULL;
1062 	if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1063 		cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1064 
1065 	if (cce == NULL) {
1066 		/*
1067 		 * At bucket limit, or out of memory - reclaim last
1068 		 * entry in bucket.
1069 		 */
1070 		cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1071 		if (cce == NULL) {
1072 			/* XXX count this event */
1073 			return (NULL);
1074 		}
1075 
1076 		TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1077 	} else
1078 		ccb->ccb_num_entries++;
1079 
1080 	TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1081 	cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1082 	if (cce->af == AF_INET) {
1083 		cce->cce_client_ip.v4 = inc->inc_laddr;
1084 		cce->cce_server_ip.v4 = inc->inc_faddr;
1085 	} else {
1086 		cce->cce_client_ip.v6 = inc->inc6_laddr;
1087 		cce->cce_server_ip.v6 = inc->inc6_faddr;
1088 	}
1089 	cce->server_port = inc->inc_fport;
1090 	if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1091 	    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1092 	    ((cookie_len & 0x1) == 0)) {
1093 		cce->server_mss = mss;
1094 		cce->cookie_len = cookie_len;
1095 		memcpy(cce->cookie, cookie, cookie_len);
1096 		cce->disable_time = 0;
1097 	} else {
1098 		/* invalid cookie length, disable cce */
1099 		cce->server_mss = 0;
1100 		cce->cookie_len = 0;
1101 		cce->disable_time = getsbinuptime();
1102 	}
1103 
1104 	return (cce);
1105 }
1106 
1107 static void
1108 tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1109     unsigned int limit)
1110 {
1111 	struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1112 	unsigned int entries;
1113 
1114 	CCB_LOCK(ccb);
1115 	entries = 0;
1116 	TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1117 		entries++;
1118 		if (entries > limit)
1119 			tcp_fastopen_ccache_entry_drop(cce, ccb);
1120 	}
1121 	KASSERT(ccb->ccb_num_entries <= (int)limit,
1122 	    ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1123 		ccb->ccb_num_entries, limit));
1124 	if (limit == 0) {
1125 		KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1126 		    ("%s: ccb->ccb_entries not empty", __func__));
1127 		ccb->ccb_num_entries = -1; /* disable bucket */
1128 	}
1129 	CCB_UNLOCK(ccb);
1130 }
1131 
1132 static void
1133 tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1134     struct tcp_fastopen_ccache_bucket *ccb)
1135 {
1136 
1137 	CCB_LOCK_ASSERT(ccb);
1138 
1139 	TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1140 	ccb->ccb_num_entries--;
1141 	uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1142 }
1143 
1144 static int
1145 sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1146 {
1147 	struct sbuf sb;
1148 	struct tcp_fastopen_ccache_bucket *ccb;
1149 	struct tcp_fastopen_ccache_entry *cce;
1150 	sbintime_t now, duration, limit;
1151 	const int linesize = 128;
1152 	int i, error, num_entries;
1153 	unsigned int j;
1154 #ifdef INET6
1155 	char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1156 #else
1157 	char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1158 #endif
1159 
1160 	if (jailed_without_vnet(curthread->td_ucred) != 0)
1161 		return (EPERM);
1162 
1163 	/* Only allow root to read the client cookie cache */
1164 	if (curthread->td_ucred->cr_uid != 0)
1165 		return (EPERM);
1166 
1167 	num_entries = 0;
1168 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1169 		ccb = &V_tcp_fastopen_ccache.base[i];
1170 		CCB_LOCK(ccb);
1171 		if (ccb->ccb_num_entries > 0)
1172 			num_entries += ccb->ccb_num_entries;
1173 		CCB_UNLOCK(ccb);
1174 	}
1175 	sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1176 
1177 	sbuf_printf(&sb,
1178 	            "\nLocal IP address     Remote IP address     Port   MSS"
1179 	            " Disabled Cookie\n");
1180 
1181 	now = getsbinuptime();
1182 	limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1183 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1184 		ccb = &V_tcp_fastopen_ccache.base[i];
1185 		CCB_LOCK(ccb);
1186 		TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1187 			if (cce->disable_time != 0) {
1188 				duration = now - cce->disable_time;
1189 				if (limit >= duration)
1190 					duration = limit - duration;
1191 				else
1192 					duration = 0;
1193 			} else
1194 				duration = 0;
1195 			sbuf_printf(&sb,
1196 			            "%-20s %-20s %5u %5u ",
1197 			            inet_ntop(cce->af, &cce->cce_client_ip,
1198 			                clt_buf, sizeof(clt_buf)),
1199 			            inet_ntop(cce->af, &cce->cce_server_ip,
1200 			                srv_buf, sizeof(srv_buf)),
1201 			            ntohs(cce->server_port),
1202 			            cce->server_mss);
1203 			if (duration > 0)
1204 				sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1205 			else
1206 				sbuf_printf(&sb, "%8s ", "No");
1207 			for (j = 0; j < cce->cookie_len; j++)
1208 				sbuf_printf(&sb, "%02x", cce->cookie[j]);
1209 			sbuf_putc(&sb, '\n');
1210 		}
1211 		CCB_UNLOCK(ccb);
1212 	}
1213 	error = sbuf_finish(&sb);
1214 	if (error == 0)
1215 		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1216 	sbuf_delete(&sb);
1217 	return (error);
1218 }
1219