xref: /freebsd/sys/netinet/tcp_fastopen.c (revision 397e83df75e0fcd0d3fcb95ae4d794cb7600fc89)
1 /*-
2  * Copyright (c) 2015-2017 Patrick Kelsey
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29  * this code, add the following line to your kernel config:
30  *
31  * options TCP_RFC7413
32  *
33  *
34  * The generated TFO cookies are the 64-bit output of
35  * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36  * keys are supported so that time-based rolling cookie invalidation
37  * policies can be implemented in the system.  The default number of
38  * concurrent keys is 2.  This can be adjusted in the kernel config as
39  * follows:
40  *
41  * options TCP_RFC7413_MAX_KEYS=<num-keys>
42  *
43  *
44  * In addition to the facilities defined in RFC7413, this implementation
45  * supports a pre-shared key (PSK) mode of operation in which the TFO server
46  * requires the client to be in possession of a shared secret in order for
47  * the client to be able to successfully open TFO connections with the
48  * server.  This is useful, for example, in environments where TFO servers
49  * are exposed to both internal and external clients and only wish to allow
50  * TFO connections from internal clients.
51  *
52  * In the PSK mode of operation, the server generates and sends TFO cookies
53  * to requesting clients as usual.  However, when validating cookies
54  * received in TFO SYNs from clients, the server requires the
55  * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56  * msg=<cookie-sent-to-client>).
57  *
58  * Multiple concurrent valid pre-shared keys are supported so that
59  * time-based rolling PSK invalidation policies can be implemented in the
60  * system.  The default number of concurrent pre-shared keys is 2.  This can
61  * be adjusted in the kernel config as follows:
62  *
63  * options TCP_RFC7413_MAX_PSKS=<num-psks>
64  *
65  *
66  * The following TFO-specific sysctls are defined:
67  *
68  * net.inet.tcp.fastopen.acceptany (RW, default 0)
69  *     When non-zero, all client-supplied TFO cookies will be considered to
70  *     be valid.
71  *
72  * net.inet.tcp.fastopen.autokey (RW, default 120)
73  *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74  *     key will be automatically generated after this many seconds.
75  *
76  * net.inet.tcp.fastopen.ccache_bucket_limit
77  *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78  *     The maximum number of entries in a client cookie cache bucket.
79  *
80  * net.inet.tcp.fastopen.ccache_buckets
81  *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82  *     The number of client cookie cache buckets.
83  *
84  * net.inet.tcp.fastopen.ccache_list (RO)
85  *     Print the client cookie cache.
86  *
87  * net.inet.tcp.fastopen.client_enable (RW, default 0)
88  *     When zero, no new active (i.e., client) TFO connections can be
89  *     created.  On the transition from enabled to disabled, the client
90  *     cookie cache is cleared and disabled.  The transition from enabled to
91  *     disabled does not affect any active TFO connections in progress; it
92  *     only prevents new ones from being made.
93  *
94  * net.inet.tcp.fastopen.keylen (RD)
95  *     The key length in bytes.
96  *
97  * net.inet.tcp.fastopen.maxkeys (RD)
98  *     The maximum number of keys supported.
99  *
100  * net.inet.tcp.fastopen.maxpsks (RD)
101  *     The maximum number of pre-shared keys supported.
102  *
103  * net.inet.tcp.fastopen.numkeys (RD)
104  *     The current number of keys installed.
105  *
106  * net.inet.tcp.fastopen.numpsks (RD)
107  *     The current number of pre-shared keys installed.
108  *
109  * net.inet.tcp.fastopen.path_disable_time
110  *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111  *     When a failure occurs while trying to create a new active (i.e.,
112  *     client) TFO connection, new active connections on the same path, as
113  *     determined by the tuple {client_ip, server_ip, server_port}, will be
114  *     forced to be non-TFO for this many seconds.  Note that the path
115  *     disable mechanism relies on state stored in client cookie cache
116  *     entries, so it is possible for the disable time for a given path to
117  *     be reduced if the corresponding client cookie cache entry is reused
118  *     due to resource pressure before the disable period has elapsed.
119  *
120  * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121  *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122  *     servers.  On the transition from enabled to disabled, all installed
123  *     pre-shared keys are removed.
124  *
125  * net.inet.tcp.fastopen.server_enable (RW, default 0)
126  *     When zero, no new passive (i.e., server) TFO connections can be
127  *     created.  On the transition from enabled to disabled, all installed
128  *     keys and pre-shared keys are removed.  On the transition from
129  *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130  *     there are no keys installed, a new key will be generated immediately.
131  *     The transition from enabled to disabled does not affect any passive
132  *     TFO connections in progress; it only prevents new ones from being
133  *     made.
134  *
135  * net.inet.tcp.fastopen.setkey (WR)
136  *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137  *     this sysctl.
138  *
139  * net.inet.tcp.fastopen.setpsk (WR)
140  *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141  *     bytes to this sysctl.
142  *
143  * In order for TFO connections to be created via a listen socket, that
144  * socket must have the TCP_FASTOPEN socket option set on it.  This option
145  * can be set on the socket either before or after the listen() is invoked.
146  * Clearing this option on a listen socket after it has been set has no
147  * effect on existing TFO connections or TFO connections in progress; it
148  * only prevents new TFO connections from being made.
149  *
150  * For passively-created sockets, the TCP_FASTOPEN socket option can be
151  * queried to determine whether the connection was established using TFO.
152  * Note that connections that are established via a TFO SYN, but that fall
153  * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154  * set.
155  *
156  * Per the RFC, this implementation limits the number of TFO connections
157  * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158  * Whenever this limit is exceeded, requests for new TFO connections are
159  * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160  * cookie, an attacker could keep the listen queue in an overflow condition
161  * using a TFO SYN flood.  This implementation sets the limit at half the
162  * configured listen backlog.
163  *
164  */
165 
166 #include <sys/cdefs.h>
167 #include "opt_inet.h"
168 
169 #include <sys/param.h>
170 #include <sys/jail.h>
171 #include <sys/kernel.h>
172 #include <sys/hash.h>
173 #include <sys/limits.h>
174 #include <sys/lock.h>
175 #include <sys/proc.h>
176 #include <sys/rmlock.h>
177 #include <sys/sbuf.h>
178 #include <sys/socket.h>
179 #include <sys/socketvar.h>
180 #include <sys/sysctl.h>
181 #include <sys/systm.h>
182 
183 #include <crypto/siphash/siphash.h>
184 
185 #include <net/vnet.h>
186 
187 #include <netinet/in.h>
188 #include <netinet/in_pcb.h>
189 #include <netinet/tcp_var.h>
190 #include <netinet/tcp_fastopen.h>
191 
192 #define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
193 
194 #if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
195 #error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
196 #endif
197 
198 /*
199  * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
200  * the PSK until the connect occurs.
201  */
202 #if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
203 #error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
204 #endif
205 
206 #define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT	16
207 #define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT		2048 /* must be power of 2 */
208 
209 #define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT		900 /* seconds */
210 
211 #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
212 #define	TCP_FASTOPEN_MAX_KEYS	2
213 #else
214 #define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
215 #endif
216 
217 #if TCP_FASTOPEN_MAX_KEYS > 10
218 #undef TCP_FASTOPEN_MAX_KEYS
219 #define	TCP_FASTOPEN_MAX_KEYS	10
220 #endif
221 
222 #if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
223 #define	TCP_FASTOPEN_MAX_PSKS	2
224 #else
225 #define	TCP_FASTOPEN_MAX_PSKS	TCP_RFC7413_MAX_PSKS
226 #endif
227 
228 #if TCP_FASTOPEN_MAX_PSKS > 10
229 #undef TCP_FASTOPEN_MAX_PSKS
230 #define	TCP_FASTOPEN_MAX_PSKS	10
231 #endif
232 
233 struct tcp_fastopen_keylist {
234 	unsigned int newest;
235 	unsigned int newest_psk;
236 	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
237 	uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
238 };
239 
240 struct tcp_fastopen_callout {
241 	struct callout c;
242 	struct vnet *v;
243 };
244 
245 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
246     struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
247 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
248     struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
249     uint8_t *);
250 static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
251     unsigned int);
252 static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
253     struct tcp_fastopen_ccache_bucket *);
254 
255 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
256     "TCP Fast Open");
257 
258 VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
259 #define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
260 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
261     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
262     "Accept any non-empty cookie");
263 
264 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
265 #define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
266 static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
267 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
268     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
269     NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU",
270     "Number of seconds between auto-generation of a new key; zero disables");
271 
272 static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
273 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
274     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_NEEDGIANT,
275     NULL, 0, &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
276     "Max entries per bucket in client cookie cache");
277 
278 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
279     TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
280 #define	V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
281 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
282     CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
283     "Client cookie cache number of buckets (power of 2)");
284 
285 VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
286 static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
287 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
288     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
289     NULL, 0, &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
290     "Enable/disable TCP Fast Open client functionality");
291 
292 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
293     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
294     "Key length in bytes");
295 
296 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
297     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
298     "Maximum number of keys supported");
299 
300 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
301     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
302     "Maximum number of pre-shared keys supported");
303 
304 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
305 #define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
306 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
307     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
308     "Number of keys installed");
309 
310 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
311 #define	V_tcp_fastopen_numpsks	VNET(tcp_fastopen_numpsks)
312 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
313     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
314     "Number of pre-shared keys installed");
315 
316 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
317     TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
318 #define	V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
319 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
320     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
321     "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
322 
323 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
324 #define	V_tcp_fastopen_psk_enable	VNET(tcp_fastopen_psk_enable)
325 static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
326 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
327     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
328     NULL, 0, &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
329     "Enable/disable TCP Fast Open server pre-shared key mode");
330 
331 VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
332 static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
333 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
334     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
335     NULL, 0, &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
336     "Enable/disable TCP Fast Open server functionality");
337 
338 static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
339 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
340     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
341     NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "",
342     "Install a new key");
343 
344 static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
345 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
346     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
347     NULL, 0, &sysctl_net_inet_tcp_fastopen_setpsk, "",
348     "Install a new pre-shared key");
349 
350 static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
351 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
352     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
353     NULL, 0, sysctl_net_inet_tcp_fastopen_ccache_list, "A",
354     "List of all client cookie cache entries");
355 
356 VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
357 #define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
358 
359 #define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
360 #define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
361 #define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
362 #define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
363 
364 VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
365 #define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
366 
367 VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
368 #define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
369 
370 VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
371 #define	V_counter_zone			VNET(counter_zone)
372 
373 static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
374 
375 VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
376 #define V_tcp_fastopen_ccache	VNET(tcp_fastopen_ccache)
377 
378 #define	CCB_LOCK(ccb)		mtx_lock(&(ccb)->ccb_mtx)
379 #define	CCB_UNLOCK(ccb)		mtx_unlock(&(ccb)->ccb_mtx)
380 #define	CCB_LOCK_ASSERT(ccb)	mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
381 
382 void
383 tcp_fastopen_init(void)
384 {
385 	unsigned int i;
386 
387 	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
388 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
389 	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
390 	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
391 	    &V_tcp_fastopen_keylock, 0);
392 	V_tcp_fastopen_autokey_ctx.v = curvnet;
393 	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
394 	V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
395 
396 	TUNABLE_INT_FETCH("net.inet.tcp.fastopen.ccache_bucket_limit",
397 	    &V_tcp_fastopen_ccache.bucket_limit);
398 	if (V_tcp_fastopen_ccache.bucket_limit == 0)
399 		V_tcp_fastopen_ccache.bucket_limit =
400 		    TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
401 
402 	/* May already be non-zero if kernel tunable was set */
403 	if ((V_tcp_fastopen_ccache_buckets == 0) ||
404 	    !powerof2(V_tcp_fastopen_ccache_buckets))
405 		V_tcp_fastopen_ccache.buckets =
406 			TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
407 	else
408 		V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
409 
410 	V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
411 	V_tcp_fastopen_ccache.secret = arc4random();
412 
413 	V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
414 	    sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
415 	    M_WAITOK | M_ZERO);
416 
417 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
418 		TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
419 		mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
420 			 NULL, MTX_DEF);
421 		if (V_tcp_fastopen_client_enable) {
422 			/* enable bucket */
423 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
424 		} else {
425 			/* disable bucket */
426 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
427 		}
428 		V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
429 	}
430 
431 	/*
432 	 * Note that while the total number of entries in the cookie cache
433 	 * is limited by the table management logic to
434 	 * V_tcp_fastopen_ccache.buckets *
435 	 * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
436 	 * this zone can exceed that amount by the number of CPUs in the
437 	 * system times the maximum number of unallocated items that can be
438 	 * present in each UMA per-CPU cache for this zone.
439 	 */
440 	V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
441 	    sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
442 	    UMA_ALIGN_CACHE, 0);
443 }
444 
445 void
446 tcp_fastopen_destroy(void)
447 {
448 	struct tcp_fastopen_ccache_bucket *ccb;
449 	unsigned int i;
450 
451 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
452 		ccb = &V_tcp_fastopen_ccache.base[i];
453 		tcp_fastopen_ccache_bucket_trim(ccb, 0);
454 		mtx_destroy(&ccb->ccb_mtx);
455 	}
456 
457 	KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
458 	    ("%s: TFO ccache zone allocation count not 0", __func__));
459 	uma_zdestroy(V_tcp_fastopen_ccache.zone);
460 	free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
461 
462 	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
463 	rm_destroy(&V_tcp_fastopen_keylock);
464 	uma_zdestroy(V_counter_zone);
465 }
466 
467 unsigned int *
468 tcp_fastopen_alloc_counter(void)
469 {
470 	unsigned int *counter;
471 	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
472 	if (counter)
473 		*counter = 1;
474 	return (counter);
475 }
476 
477 void
478 tcp_fastopen_decrement_counter(unsigned int *counter)
479 {
480 	if (*counter == 1)
481 		uma_zfree(V_counter_zone, counter);
482 	else
483 		atomic_subtract_int(counter, 1);
484 }
485 
486 static void
487 tcp_fastopen_addkey_locked(uint8_t *key)
488 {
489 
490 	V_tcp_fastopen_keys.newest++;
491 	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
492 		V_tcp_fastopen_keys.newest = 0;
493 	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
494 	    TCP_FASTOPEN_KEY_LEN);
495 	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
496 		V_tcp_fastopen_numkeys++;
497 }
498 
499 static void
500 tcp_fastopen_addpsk_locked(uint8_t *psk)
501 {
502 
503 	V_tcp_fastopen_keys.newest_psk++;
504 	if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
505 		V_tcp_fastopen_keys.newest_psk = 0;
506 	memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
507 	    TCP_FASTOPEN_KEY_LEN);
508 	if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
509 		V_tcp_fastopen_numpsks++;
510 }
511 
512 static void
513 tcp_fastopen_autokey_locked(void)
514 {
515 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
516 
517 	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
518 	tcp_fastopen_addkey_locked(newkey);
519 }
520 
521 static void
522 tcp_fastopen_autokey_callout(void *arg)
523 {
524 	struct tcp_fastopen_callout *ctx = arg;
525 
526 	CURVNET_SET(ctx->v);
527 	tcp_fastopen_autokey_locked();
528 	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
529 		      tcp_fastopen_autokey_callout, ctx);
530 	CURVNET_RESTORE();
531 }
532 
533 static uint64_t
534 tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
535 {
536 	SIPHASH_CTX ctx;
537 	uint64_t siphash;
538 
539 	SipHash24_Init(&ctx);
540 	SipHash_SetKey(&ctx, key);
541 	switch (inc->inc_flags & INC_ISIPV6) {
542 #ifdef INET
543 	case 0:
544 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
545 		break;
546 #endif
547 #ifdef INET6
548 	case INC_ISIPV6:
549 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
550 		break;
551 #endif
552 	}
553 	SipHash_Final((u_int8_t *)&siphash, &ctx);
554 
555 	return (siphash);
556 }
557 
558 static uint64_t
559 tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
560 {
561 	SIPHASH_CTX ctx;
562 	uint64_t psk_cookie;
563 
564 	SipHash24_Init(&ctx);
565 	SipHash_SetKey(&ctx, psk);
566 	SipHash_Update(&ctx, cookie, cookie_len);
567 	SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
568 
569 	return (psk_cookie);
570 }
571 
572 static int
573 tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
574 {
575 	unsigned int i, psk_index;
576 	uint64_t psk_cookie;
577 
578 	if (V_tcp_fastopen_psk_enable) {
579 		psk_index = V_tcp_fastopen_keys.newest_psk;
580 		for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
581 			psk_cookie =
582 			    tcp_fastopen_make_psk_cookie(
583 				 V_tcp_fastopen_keys.psk[psk_index],
584 				 (uint8_t *)cur_cookie,
585 				 TCP_FASTOPEN_COOKIE_LEN);
586 
587 			if (memcmp(wire_cookie, &psk_cookie,
588 				   TCP_FASTOPEN_COOKIE_LEN) == 0)
589 				return (1);
590 
591 			if (psk_index == 0)
592 				psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
593 			else
594 				psk_index--;
595 		}
596 	} else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
597 		return (1);
598 
599 	return (0);
600 }
601 
602 /*
603  * Return values:
604  *	-1	the cookie is invalid and no valid cookie is available
605  *	 0	the cookie is invalid and the latest cookie has been returned
606  *	 1	the cookie is valid and the latest cookie has been returned
607  */
608 int
609 tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
610     unsigned int len, uint64_t *latest_cookie)
611 {
612 	struct rm_priotracker tracker;
613 	unsigned int i, key_index;
614 	int rv;
615 	uint64_t cur_cookie;
616 
617 	if (V_tcp_fastopen_acceptany) {
618 		*latest_cookie = 0;
619 		return (1);
620 	}
621 
622 	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
623 	if (len != TCP_FASTOPEN_COOKIE_LEN) {
624 		if (V_tcp_fastopen_numkeys > 0) {
625 			*latest_cookie =
626 			    tcp_fastopen_make_cookie(
627 				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
628 				inc);
629 			rv = 0;
630 		} else
631 			rv = -1;
632 		goto out;
633 	}
634 
635 	/*
636 	 * Check against each available key, from newest to oldest.
637 	 */
638 	key_index = V_tcp_fastopen_keys.newest;
639 	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
640 		cur_cookie =
641 		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
642 			inc);
643 		if (i == 0)
644 			*latest_cookie = cur_cookie;
645 		rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
646 		if (rv)
647 			goto out;
648 		if (key_index == 0)
649 			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
650 		else
651 			key_index--;
652 	}
653 	rv = 0;
654 
655  out:
656 	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
657 	return (rv);
658 }
659 
660 static int
661 sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
662 {
663 	int error;
664 	unsigned int new;
665 
666 	new = V_tcp_fastopen_autokey;
667 	error = sysctl_handle_int(oidp, &new, 0, req);
668 	if (error == 0 && req->newptr) {
669 		if (new > (INT_MAX / hz))
670 			return (EINVAL);
671 
672 		TCP_FASTOPEN_KEYS_WLOCK();
673 		if (V_tcp_fastopen_server_enable) {
674 			if (V_tcp_fastopen_autokey && !new)
675 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
676 			else if (new)
677 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
678 				    new * hz, tcp_fastopen_autokey_callout,
679 				    &V_tcp_fastopen_autokey_ctx);
680 		}
681 		V_tcp_fastopen_autokey = new;
682 		TCP_FASTOPEN_KEYS_WUNLOCK();
683 	}
684 
685 	return (error);
686 }
687 
688 static int
689 sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
690 {
691 	int error;
692 	unsigned int new;
693 
694 	new = V_tcp_fastopen_psk_enable;
695 	error = sysctl_handle_int(oidp, &new, 0, req);
696 	if (error == 0 && req->newptr) {
697 		if (V_tcp_fastopen_psk_enable && !new) {
698 			/* enabled -> disabled */
699 			TCP_FASTOPEN_KEYS_WLOCK();
700 			V_tcp_fastopen_numpsks = 0;
701 			V_tcp_fastopen_keys.newest_psk =
702 			    TCP_FASTOPEN_MAX_PSKS - 1;
703 			V_tcp_fastopen_psk_enable = 0;
704 			TCP_FASTOPEN_KEYS_WUNLOCK();
705 		} else if (!V_tcp_fastopen_psk_enable && new) {
706 			/* disabled -> enabled */
707 			TCP_FASTOPEN_KEYS_WLOCK();
708 			V_tcp_fastopen_psk_enable = 1;
709 			TCP_FASTOPEN_KEYS_WUNLOCK();
710 		}
711 	}
712 	return (error);
713 }
714 
715 static int
716 sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
717 {
718 	int error;
719 	unsigned int new;
720 
721 	new = V_tcp_fastopen_server_enable;
722 	error = sysctl_handle_int(oidp, &new, 0, req);
723 	if (error == 0 && req->newptr) {
724 		if (V_tcp_fastopen_server_enable && !new) {
725 			/* enabled -> disabled */
726 			TCP_FASTOPEN_KEYS_WLOCK();
727 			V_tcp_fastopen_numkeys = 0;
728 			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
729 			if (V_tcp_fastopen_autokey)
730 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
731 			V_tcp_fastopen_numpsks = 0;
732 			V_tcp_fastopen_keys.newest_psk =
733 			    TCP_FASTOPEN_MAX_PSKS - 1;
734 			V_tcp_fastopen_server_enable = 0;
735 			TCP_FASTOPEN_KEYS_WUNLOCK();
736 		} else if (!V_tcp_fastopen_server_enable && new) {
737 			/* disabled -> enabled */
738 			TCP_FASTOPEN_KEYS_WLOCK();
739 			if (V_tcp_fastopen_autokey &&
740 			    (V_tcp_fastopen_numkeys == 0)) {
741 				tcp_fastopen_autokey_locked();
742 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
743 				    V_tcp_fastopen_autokey * hz,
744 				    tcp_fastopen_autokey_callout,
745 				    &V_tcp_fastopen_autokey_ctx);
746 			}
747 			V_tcp_fastopen_server_enable = 1;
748 			TCP_FASTOPEN_KEYS_WUNLOCK();
749 		}
750 	}
751 	return (error);
752 }
753 
754 static int
755 sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
756 {
757 	int error;
758 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
759 
760 	if (req->oldptr != NULL || req->oldlen != 0)
761 		return (EINVAL);
762 	if (req->newptr == NULL)
763 		return (EPERM);
764 	if (req->newlen != sizeof(newkey))
765 		return (EINVAL);
766 	error = SYSCTL_IN(req, newkey, sizeof(newkey));
767 	if (error)
768 		return (error);
769 
770 	TCP_FASTOPEN_KEYS_WLOCK();
771 	tcp_fastopen_addkey_locked(newkey);
772 	TCP_FASTOPEN_KEYS_WUNLOCK();
773 
774 	return (0);
775 }
776 
777 static int
778 sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
779 {
780 	int error;
781 	uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
782 
783 	if (req->oldptr != NULL || req->oldlen != 0)
784 		return (EINVAL);
785 	if (req->newptr == NULL)
786 		return (EPERM);
787 	if (req->newlen != sizeof(newpsk))
788 		return (EINVAL);
789 	error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
790 	if (error)
791 		return (error);
792 
793 	TCP_FASTOPEN_KEYS_WLOCK();
794 	tcp_fastopen_addpsk_locked(newpsk);
795 	TCP_FASTOPEN_KEYS_WUNLOCK();
796 
797 	return (0);
798 }
799 
800 static int
801 sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
802 {
803 	struct tcp_fastopen_ccache_bucket *ccb;
804 	int error;
805 	unsigned int new;
806 	unsigned int i;
807 
808 	new = V_tcp_fastopen_ccache.bucket_limit;
809 	error = sysctl_handle_int(oidp, &new, 0, req);
810 	if (error == 0 && req->newptr) {
811 		if ((new == 0) || (new > INT_MAX))
812 			error = EINVAL;
813 		else {
814 			if (new < V_tcp_fastopen_ccache.bucket_limit) {
815 				for (i = 0; i < V_tcp_fastopen_ccache.buckets;
816 				     i++) {
817 					ccb = &V_tcp_fastopen_ccache.base[i];
818 					tcp_fastopen_ccache_bucket_trim(ccb, new);
819 				}
820 			}
821 			V_tcp_fastopen_ccache.bucket_limit = new;
822 		}
823 	}
824 	return (error);
825 }
826 
827 static int
828 sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
829 {
830 	struct tcp_fastopen_ccache_bucket *ccb;
831 	int error;
832 	unsigned int new, i;
833 
834 	new = V_tcp_fastopen_client_enable;
835 	error = sysctl_handle_int(oidp, &new, 0, req);
836 	if (error == 0 && req->newptr) {
837 		if (V_tcp_fastopen_client_enable && !new) {
838 			/* enabled -> disabled */
839 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
840 				ccb = &V_tcp_fastopen_ccache.base[i];
841 				KASSERT(ccb->ccb_num_entries > -1,
842 				    ("%s: ccb->ccb_num_entries %d is negative",
843 					__func__, ccb->ccb_num_entries));
844 				tcp_fastopen_ccache_bucket_trim(ccb, 0);
845 			}
846 			V_tcp_fastopen_client_enable = 0;
847 		} else if (!V_tcp_fastopen_client_enable && new) {
848 			/* disabled -> enabled */
849 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
850 				ccb = &V_tcp_fastopen_ccache.base[i];
851 				CCB_LOCK(ccb);
852 				KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
853 				    ("%s: ccb->ccb_entries not empty", __func__));
854 				KASSERT(ccb->ccb_num_entries == -1,
855 				    ("%s: ccb->ccb_num_entries %d not -1", __func__,
856 					ccb->ccb_num_entries));
857 				ccb->ccb_num_entries = 0; /* enable bucket */
858 				CCB_UNLOCK(ccb);
859 			}
860 			V_tcp_fastopen_client_enable = 1;
861 		}
862 	}
863 	return (error);
864 }
865 
866 void
867 tcp_fastopen_connect(struct tcpcb *tp)
868 {
869 	struct inpcb *inp = tptoinpcb(tp);
870 	struct tcp_fastopen_ccache_bucket *ccb;
871 	struct tcp_fastopen_ccache_entry *cce;
872 	sbintime_t now;
873 	uint16_t server_mss;
874 	uint64_t psk_cookie;
875 
876 	psk_cookie = 0;
877 	cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
878 	if (cce) {
879 		if (cce->disable_time == 0) {
880 			if ((cce->cookie_len > 0) &&
881 			    (tp->t_tfo_client_cookie_len ==
882 			     TCP_FASTOPEN_PSK_LEN)) {
883 				psk_cookie =
884 				    tcp_fastopen_make_psk_cookie(
885 					tp->t_tfo_cookie.client,
886 					cce->cookie, cce->cookie_len);
887 			} else {
888 				tp->t_tfo_client_cookie_len = cce->cookie_len;
889 				memcpy(tp->t_tfo_cookie.client, cce->cookie,
890 				    cce->cookie_len);
891 			}
892 			server_mss = cce->server_mss;
893 			CCB_UNLOCK(ccb);
894 			if (tp->t_tfo_client_cookie_len ==
895 			    TCP_FASTOPEN_PSK_LEN && psk_cookie) {
896 				tp->t_tfo_client_cookie_len =
897 				    TCP_FASTOPEN_COOKIE_LEN;
898 				memcpy(tp->t_tfo_cookie.client, &psk_cookie,
899 				    TCP_FASTOPEN_COOKIE_LEN);
900 			}
901 			tcp_mss(tp, server_mss ? server_mss : -1);
902 			tp->snd_wnd = tp->t_maxseg;
903 		} else {
904 			/*
905 			 * The path is disabled.  Check the time and
906 			 * possibly re-enable.
907 			 */
908 			now = getsbinuptime();
909 			if (now - cce->disable_time >
910 			    ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
911 				/*
912 				 * Re-enable path.  Force a TFO cookie
913 				 * request.  Forget the old MSS as it may be
914 				 * bogus now, and we will rediscover it in
915 				 * the SYN|ACK.
916 				 */
917 				cce->disable_time = 0;
918 				cce->server_mss = 0;
919 				cce->cookie_len = 0;
920 				/*
921 				 * tp->t_tfo... cookie details are already
922 				 * zero from the tcpcb init.
923 				 */
924 			} else {
925 				/*
926 				 * Path is disabled, so disable TFO on this
927 				 * connection.
928 				 */
929 				tp->t_flags &= ~TF_FASTOPEN;
930 			}
931 			CCB_UNLOCK(ccb);
932 			tcp_mss(tp, -1);
933 			/*
934 			 * snd_wnd is irrelevant since we are either forcing
935 			 * a TFO cookie request or disabling TFO - either
936 			 * way, no data with the SYN.
937 			 */
938 		}
939 	} else {
940 		/*
941 		 * A new entry for this path will be created when a SYN|ACK
942 		 * comes back, or the attempt otherwise fails.
943 		 */
944 		CCB_UNLOCK(ccb);
945 		tcp_mss(tp, -1);
946 		/*
947 		 * snd_wnd is irrelevant since we are forcing a TFO cookie
948 		 * request.
949 		 */
950 	}
951 }
952 
953 void
954 tcp_fastopen_disable_path(struct tcpcb *tp)
955 {
956 	struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
957 	struct tcp_fastopen_ccache_bucket *ccb;
958 	struct tcp_fastopen_ccache_entry *cce;
959 
960 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
961 	if (cce) {
962 		cce->server_mss = 0;
963 		cce->cookie_len = 0;
964 		/*
965 		 * Preserve the existing disable time if it is already
966 		 * disabled.
967 		 */
968 		if (cce->disable_time == 0)
969 			cce->disable_time = getsbinuptime();
970 	} else /* use invalid cookie len to create disabled entry */
971 		tcp_fastopen_ccache_create(ccb, inc, 0,
972 	   	    TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
973 
974 	CCB_UNLOCK(ccb);
975 	tp->t_flags &= ~TF_FASTOPEN;
976 }
977 
978 void
979 tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
980     uint8_t cookie_len, uint8_t *cookie)
981 {
982 	struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
983 	struct tcp_fastopen_ccache_bucket *ccb;
984 	struct tcp_fastopen_ccache_entry *cce;
985 
986 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
987 	if (cce) {
988 		if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
989 		    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
990 		    ((cookie_len & 0x1) == 0)) {
991 			cce->server_mss = mss;
992 			cce->cookie_len = cookie_len;
993 			memcpy(cce->cookie, cookie, cookie_len);
994 			cce->disable_time = 0;
995 		} else {
996 			/* invalid cookie length, disable entry */
997 			cce->server_mss = 0;
998 			cce->cookie_len = 0;
999 			/*
1000 			 * Preserve the existing disable time if it is
1001 			 * already disabled.
1002 			 */
1003 			if (cce->disable_time == 0)
1004 				cce->disable_time = getsbinuptime();
1005 		}
1006 	} else
1007 		tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1008 
1009 	CCB_UNLOCK(ccb);
1010 }
1011 
1012 static struct tcp_fastopen_ccache_entry *
1013 tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1014     struct tcp_fastopen_ccache_bucket **ccbp)
1015 {
1016 	struct tcp_fastopen_ccache_bucket *ccb;
1017 	struct tcp_fastopen_ccache_entry *cce;
1018 	uint32_t last_word;
1019 	uint32_t hash;
1020 
1021 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1022 	    V_tcp_fastopen_ccache.secret);
1023 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1024 	    hash);
1025 	last_word = inc->inc_fport;
1026 	hash = jenkins_hash32(&last_word, 1, hash);
1027 	ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1028 	*ccbp = ccb;
1029 	CCB_LOCK(ccb);
1030 
1031 	/*
1032 	 * Always returns with locked bucket.
1033 	 */
1034 	TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1035 		if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1036 		    (cce->server_port == inc->inc_ie.ie_fport) &&
1037 		    (((cce->af == AF_INET) &&
1038 		      (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1039 		      (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1040 		     ((cce->af == AF_INET6) &&
1041 		      IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1042 		      IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1043 			break;
1044 
1045 	return (cce);
1046 }
1047 
1048 static struct tcp_fastopen_ccache_entry *
1049 tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1050     struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1051 {
1052 	struct tcp_fastopen_ccache_entry *cce;
1053 
1054 	/*
1055 	 * 1. Create a new entry, or
1056 	 * 2. Reclaim an existing entry, or
1057 	 * 3. Fail
1058 	 */
1059 
1060 	CCB_LOCK_ASSERT(ccb);
1061 
1062 	cce = NULL;
1063 	if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1064 		cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1065 
1066 	if (cce == NULL) {
1067 		/*
1068 		 * At bucket limit, or out of memory - reclaim last
1069 		 * entry in bucket.
1070 		 */
1071 		cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1072 		if (cce == NULL) {
1073 			/* XXX count this event */
1074 			return (NULL);
1075 		}
1076 
1077 		TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1078 	} else
1079 		ccb->ccb_num_entries++;
1080 
1081 	TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1082 	cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1083 	if (cce->af == AF_INET) {
1084 		cce->cce_client_ip.v4 = inc->inc_laddr;
1085 		cce->cce_server_ip.v4 = inc->inc_faddr;
1086 	} else {
1087 		cce->cce_client_ip.v6 = inc->inc6_laddr;
1088 		cce->cce_server_ip.v6 = inc->inc6_faddr;
1089 	}
1090 	cce->server_port = inc->inc_fport;
1091 	if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1092 	    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1093 	    ((cookie_len & 0x1) == 0)) {
1094 		cce->server_mss = mss;
1095 		cce->cookie_len = cookie_len;
1096 		memcpy(cce->cookie, cookie, cookie_len);
1097 		cce->disable_time = 0;
1098 	} else {
1099 		/* invalid cookie length, disable cce */
1100 		cce->server_mss = 0;
1101 		cce->cookie_len = 0;
1102 		cce->disable_time = getsbinuptime();
1103 	}
1104 
1105 	return (cce);
1106 }
1107 
1108 static void
1109 tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1110     unsigned int limit)
1111 {
1112 	struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1113 	unsigned int entries;
1114 
1115 	CCB_LOCK(ccb);
1116 	entries = 0;
1117 	TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1118 		entries++;
1119 		if (entries > limit)
1120 			tcp_fastopen_ccache_entry_drop(cce, ccb);
1121 	}
1122 	KASSERT(ccb->ccb_num_entries <= (int)limit,
1123 	    ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1124 		ccb->ccb_num_entries, limit));
1125 	if (limit == 0) {
1126 		KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1127 		    ("%s: ccb->ccb_entries not empty", __func__));
1128 		ccb->ccb_num_entries = -1; /* disable bucket */
1129 	}
1130 	CCB_UNLOCK(ccb);
1131 }
1132 
1133 static void
1134 tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1135     struct tcp_fastopen_ccache_bucket *ccb)
1136 {
1137 
1138 	CCB_LOCK_ASSERT(ccb);
1139 
1140 	TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1141 	ccb->ccb_num_entries--;
1142 	uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1143 }
1144 
1145 static int
1146 sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1147 {
1148 	struct sbuf sb;
1149 	struct tcp_fastopen_ccache_bucket *ccb;
1150 	struct tcp_fastopen_ccache_entry *cce;
1151 	sbintime_t now, duration, limit;
1152 	const int linesize = 128;
1153 	int i, error, num_entries;
1154 	unsigned int j;
1155 #ifdef INET6
1156 	char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1157 #else
1158 	char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1159 #endif
1160 
1161 	if (jailed_without_vnet(curthread->td_ucred) != 0)
1162 		return (EPERM);
1163 
1164 	/* Only allow root to read the client cookie cache */
1165 	if (curthread->td_ucred->cr_uid != 0)
1166 		return (EPERM);
1167 
1168 	num_entries = 0;
1169 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1170 		ccb = &V_tcp_fastopen_ccache.base[i];
1171 		CCB_LOCK(ccb);
1172 		if (ccb->ccb_num_entries > 0)
1173 			num_entries += ccb->ccb_num_entries;
1174 		CCB_UNLOCK(ccb);
1175 	}
1176 	sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1177 
1178 	sbuf_printf(&sb,
1179 	            "\nLocal IP address     Remote IP address     Port   MSS"
1180 	            " Disabled Cookie\n");
1181 
1182 	now = getsbinuptime();
1183 	limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1184 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1185 		ccb = &V_tcp_fastopen_ccache.base[i];
1186 		CCB_LOCK(ccb);
1187 		TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1188 			if (cce->disable_time != 0) {
1189 				duration = now - cce->disable_time;
1190 				if (limit >= duration)
1191 					duration = limit - duration;
1192 				else
1193 					duration = 0;
1194 			} else
1195 				duration = 0;
1196 			sbuf_printf(&sb,
1197 			            "%-20s %-20s %5u %5u ",
1198 			            inet_ntop(cce->af, &cce->cce_client_ip,
1199 			                clt_buf, sizeof(clt_buf)),
1200 			            inet_ntop(cce->af, &cce->cce_server_ip,
1201 			                srv_buf, sizeof(srv_buf)),
1202 			            ntohs(cce->server_port),
1203 			            cce->server_mss);
1204 			if (duration > 0)
1205 				sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1206 			else
1207 				sbuf_printf(&sb, "%8s ", "No");
1208 			for (j = 0; j < cce->cookie_len; j++)
1209 				sbuf_printf(&sb, "%02x", cce->cookie[j]);
1210 			sbuf_putc(&sb, '\n');
1211 		}
1212 		CCB_UNLOCK(ccb);
1213 	}
1214 	error = sbuf_finish(&sb);
1215 	if (error == 0)
1216 		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1217 	sbuf_delete(&sb);
1218 	return (error);
1219 }
1220