xref: /freebsd/sys/dev/wg/if_wg.c (revision 058ac3e8063366dafa634d9107642e12b038bf09)
1 /* SPDX-License-Identifier: ISC
2  *
3  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4  * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
5  * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
6  * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
7  * Copyright (c) 2022 The FreeBSD Foundation
8  */
9 
10 #include "opt_inet.h"
11 #include "opt_inet6.h"
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/counter.h>
16 #include <sys/gtaskqueue.h>
17 #include <sys/jail.h>
18 #include <sys/kernel.h>
19 #include <sys/lock.h>
20 #include <sys/mbuf.h>
21 #include <sys/module.h>
22 #include <sys/nv.h>
23 #include <sys/priv.h>
24 #include <sys/protosw.h>
25 #include <sys/rmlock.h>
26 #include <sys/rwlock.h>
27 #include <sys/smp.h>
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/sockio.h>
31 #include <sys/sysctl.h>
32 #include <sys/sx.h>
33 #include <machine/_inttypes.h>
34 #include <net/bpf.h>
35 #include <net/ethernet.h>
36 #include <net/if.h>
37 #include <net/if_clone.h>
38 #include <net/if_types.h>
39 #include <net/if_var.h>
40 #include <net/netisr.h>
41 #include <net/radix.h>
42 #include <netinet/in.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip6.h>
46 #include <netinet/ip_icmp.h>
47 #include <netinet/icmp6.h>
48 #include <netinet/udp_var.h>
49 #include <netinet6/nd6.h>
50 
51 #include "wg_noise.h"
52 #include "wg_cookie.h"
53 #include "version.h"
54 #include "if_wg.h"
55 
56 #define DEFAULT_MTU		(ETHERMTU - 80)
57 #define MAX_MTU			(IF_MAXMTU - 80)
58 
59 #define MAX_STAGED_PKT		128
60 #define MAX_QUEUED_PKT		1024
61 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
62 
63 #define MAX_QUEUED_HANDSHAKES	4096
64 
65 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
66 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
67 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
68 #define UNDERLOAD_TIMEOUT	1
69 
70 #define DPRINTF(sc, ...) if (sc->sc_ifp->if_flags & IFF_DEBUG) if_printf(sc->sc_ifp, ##__VA_ARGS__)
71 
72 /* First byte indicating packet type on the wire */
73 #define WG_PKT_INITIATION htole32(1)
74 #define WG_PKT_RESPONSE htole32(2)
75 #define WG_PKT_COOKIE htole32(3)
76 #define WG_PKT_DATA htole32(4)
77 
78 #define WG_PKT_PADDING		16
79 #define WG_KEY_SIZE		32
80 
81 struct wg_pkt_initiation {
82 	uint32_t		t;
83 	uint32_t		s_idx;
84 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
85 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
86 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
87 	struct cookie_macs	m;
88 };
89 
90 struct wg_pkt_response {
91 	uint32_t		t;
92 	uint32_t		s_idx;
93 	uint32_t		r_idx;
94 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
95 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
96 	struct cookie_macs	m;
97 };
98 
99 struct wg_pkt_cookie {
100 	uint32_t		t;
101 	uint32_t		r_idx;
102 	uint8_t			nonce[COOKIE_NONCE_SIZE];
103 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
104 };
105 
106 struct wg_pkt_data {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint64_t		nonce;
110 	uint8_t			buf[];
111 };
112 
113 struct wg_endpoint {
114 	union {
115 		struct sockaddr		r_sa;
116 		struct sockaddr_in	r_sin;
117 #ifdef INET6
118 		struct sockaddr_in6	r_sin6;
119 #endif
120 	} e_remote;
121 	union {
122 		struct in_addr		l_in;
123 #ifdef INET6
124 		struct in6_pktinfo	l_pktinfo6;
125 #define l_in6 l_pktinfo6.ipi6_addr
126 #endif
127 	} e_local;
128 };
129 
130 struct aip_addr {
131 	uint8_t		length;
132 	union {
133 		uint8_t		bytes[16];
134 		uint32_t	ip;
135 		uint32_t	ip6[4];
136 		struct in_addr	in;
137 		struct in6_addr	in6;
138 	};
139 };
140 
141 struct wg_aip {
142 	struct radix_node	 a_nodes[2];
143 	LIST_ENTRY(wg_aip)	 a_entry;
144 	struct aip_addr		 a_addr;
145 	struct aip_addr		 a_mask;
146 	struct wg_peer		*a_peer;
147 	sa_family_t		 a_af;
148 };
149 
150 struct wg_packet {
151 	STAILQ_ENTRY(wg_packet)	 p_serial;
152 	STAILQ_ENTRY(wg_packet)	 p_parallel;
153 	struct wg_endpoint	 p_endpoint;
154 	struct noise_keypair	*p_keypair;
155 	uint64_t		 p_nonce;
156 	struct mbuf		*p_mbuf;
157 	int			 p_mtu;
158 	sa_family_t		 p_af;
159 	enum wg_ring_state {
160 		WG_PACKET_UNCRYPTED,
161 		WG_PACKET_CRYPTED,
162 		WG_PACKET_DEAD,
163 	}			 p_state;
164 };
165 
166 STAILQ_HEAD(wg_packet_list, wg_packet);
167 
168 struct wg_queue {
169 	struct mtx		 q_mtx;
170 	struct wg_packet_list	 q_queue;
171 	size_t			 q_len;
172 };
173 
174 struct wg_peer {
175 	TAILQ_ENTRY(wg_peer)		 p_entry;
176 	uint64_t			 p_id;
177 	struct wg_softc			*p_sc;
178 
179 	struct noise_remote		*p_remote;
180 	struct cookie_maker		 p_cookie;
181 
182 	struct rwlock			 p_endpoint_lock;
183 	struct wg_endpoint		 p_endpoint;
184 
185 	struct wg_queue	 		 p_stage_queue;
186 	struct wg_queue	 		 p_encrypt_serial;
187 	struct wg_queue	 		 p_decrypt_serial;
188 
189 	bool				 p_enabled;
190 	bool				 p_need_another_keepalive;
191 	uint16_t			 p_persistent_keepalive_interval;
192 	struct callout			 p_new_handshake;
193 	struct callout			 p_send_keepalive;
194 	struct callout			 p_retry_handshake;
195 	struct callout			 p_zero_key_material;
196 	struct callout			 p_persistent_keepalive;
197 
198 	struct mtx			 p_handshake_mtx;
199 	struct timespec			 p_handshake_complete;	/* nanotime */
200 	int				 p_handshake_retries;
201 
202 	struct grouptask		 p_send;
203 	struct grouptask		 p_recv;
204 
205 	counter_u64_t			 p_tx_bytes;
206 	counter_u64_t			 p_rx_bytes;
207 
208 	LIST_HEAD(, wg_aip)		 p_aips;
209 	size_t				 p_aips_num;
210 };
211 
212 struct wg_socket {
213 	struct socket	*so_so4;
214 	struct socket	*so_so6;
215 	uint32_t	 so_user_cookie;
216 	int		 so_fibnum;
217 	in_port_t	 so_port;
218 };
219 
220 struct wg_softc {
221 	LIST_ENTRY(wg_softc)	 sc_entry;
222 	struct ifnet		*sc_ifp;
223 	int			 sc_flags;
224 
225 	struct ucred		*sc_ucred;
226 	struct wg_socket	 sc_socket;
227 
228 	TAILQ_HEAD(,wg_peer)	 sc_peers;
229 	size_t			 sc_peers_num;
230 
231 	struct noise_local	*sc_local;
232 	struct cookie_checker	 sc_cookie;
233 
234 	struct radix_node_head	*sc_aip4;
235 	struct radix_node_head	*sc_aip6;
236 
237 	struct grouptask	 sc_handshake;
238 	struct wg_queue		 sc_handshake_queue;
239 
240 	struct grouptask	*sc_encrypt;
241 	struct grouptask	*sc_decrypt;
242 	struct wg_queue		 sc_encrypt_parallel;
243 	struct wg_queue		 sc_decrypt_parallel;
244 	u_int			 sc_encrypt_last_cpu;
245 	u_int			 sc_decrypt_last_cpu;
246 
247 	struct sx		 sc_lock;
248 };
249 
250 #define	WGF_DYING	0x0001
251 
252 #define MAX_LOOPS	8
253 #define MTAG_WGLOOP	0x77676c70 /* wglp */
254 #ifndef ENOKEY
255 #define	ENOKEY	ENOTCAPABLE
256 #endif
257 
258 #define	GROUPTASK_DRAIN(gtask)			\
259 	gtaskqueue_drain((gtask)->gt_taskqueue, &(gtask)->gt_task)
260 
261 #define BPF_MTAP2_AF(ifp, m, af) do { \
262 		uint32_t __bpf_tap_af = (af); \
263 		BPF_MTAP2(ifp, &__bpf_tap_af, sizeof(__bpf_tap_af), m); \
264 	} while (0)
265 
266 static int clone_count;
267 static uma_zone_t wg_packet_zone;
268 static volatile unsigned long peer_counter = 0;
269 static const char wgname[] = "wg";
270 static unsigned wg_osd_jail_slot;
271 
272 static struct sx wg_sx;
273 SX_SYSINIT(wg_sx, &wg_sx, "wg_sx");
274 
275 static LIST_HEAD(, wg_softc) wg_list = LIST_HEAD_INITIALIZER(wg_list);
276 
277 static TASKQGROUP_DEFINE(wg_tqg, mp_ncpus, 1);
278 
279 MALLOC_DEFINE(M_WG, "WG", "wireguard");
280 
281 VNET_DEFINE_STATIC(struct if_clone *, wg_cloner);
282 
283 #define	V_wg_cloner	VNET(wg_cloner)
284 #define	WG_CAPS		IFCAP_LINKSTATE
285 
286 struct wg_timespec64 {
287 	uint64_t	tv_sec;
288 	uint64_t	tv_nsec;
289 };
290 
291 static int wg_socket_init(struct wg_softc *, in_port_t);
292 static int wg_socket_bind(struct socket **, struct socket **, in_port_t *);
293 static void wg_socket_set(struct wg_softc *, struct socket *, struct socket *);
294 static void wg_socket_uninit(struct wg_softc *);
295 static int wg_socket_set_sockopt(struct socket *, struct socket *, int, void *, size_t);
296 static int wg_socket_set_cookie(struct wg_softc *, uint32_t);
297 static int wg_socket_set_fibnum(struct wg_softc *, int);
298 static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
299 static void wg_timers_enable(struct wg_peer *);
300 static void wg_timers_disable(struct wg_peer *);
301 static void wg_timers_set_persistent_keepalive(struct wg_peer *, uint16_t);
302 static void wg_timers_get_last_handshake(struct wg_peer *, struct wg_timespec64 *);
303 static void wg_timers_event_data_sent(struct wg_peer *);
304 static void wg_timers_event_data_received(struct wg_peer *);
305 static void wg_timers_event_any_authenticated_packet_sent(struct wg_peer *);
306 static void wg_timers_event_any_authenticated_packet_received(struct wg_peer *);
307 static void wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *);
308 static void wg_timers_event_handshake_initiated(struct wg_peer *);
309 static void wg_timers_event_handshake_complete(struct wg_peer *);
310 static void wg_timers_event_session_derived(struct wg_peer *);
311 static void wg_timers_event_want_initiation(struct wg_peer *);
312 static void wg_timers_run_send_initiation(struct wg_peer *, bool);
313 static void wg_timers_run_retry_handshake(void *);
314 static void wg_timers_run_send_keepalive(void *);
315 static void wg_timers_run_new_handshake(void *);
316 static void wg_timers_run_zero_key_material(void *);
317 static void wg_timers_run_persistent_keepalive(void *);
318 static int wg_aip_add(struct wg_softc *, struct wg_peer *, sa_family_t, const void *, uint8_t);
319 static struct wg_peer *wg_aip_lookup(struct wg_softc *, sa_family_t, void *);
320 static void wg_aip_remove_all(struct wg_softc *, struct wg_peer *);
321 static struct wg_peer *wg_peer_alloc(struct wg_softc *, const uint8_t [WG_KEY_SIZE]);
322 static void wg_peer_free_deferred(struct noise_remote *);
323 static void wg_peer_destroy(struct wg_peer *);
324 static void wg_peer_destroy_all(struct wg_softc *);
325 static void wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
326 static void wg_send_initiation(struct wg_peer *);
327 static void wg_send_response(struct wg_peer *);
328 static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct wg_endpoint *);
329 static void wg_peer_set_endpoint(struct wg_peer *, struct wg_endpoint *);
330 static void wg_peer_clear_src(struct wg_peer *);
331 static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
332 static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *, size_t);
333 static void wg_send_keepalive(struct wg_peer *);
334 static void wg_handshake(struct wg_softc *, struct wg_packet *);
335 static void wg_encrypt(struct wg_softc *, struct wg_packet *);
336 static void wg_decrypt(struct wg_softc *, struct wg_packet *);
337 static void wg_softc_handshake_receive(struct wg_softc *);
338 static void wg_softc_decrypt(struct wg_softc *);
339 static void wg_softc_encrypt(struct wg_softc *);
340 static void wg_encrypt_dispatch(struct wg_softc *);
341 static void wg_decrypt_dispatch(struct wg_softc *);
342 static void wg_deliver_out(struct wg_peer *);
343 static void wg_deliver_in(struct wg_peer *);
344 static struct wg_packet *wg_packet_alloc(struct mbuf *);
345 static void wg_packet_free(struct wg_packet *);
346 static void wg_queue_init(struct wg_queue *, const char *);
347 static void wg_queue_deinit(struct wg_queue *);
348 static size_t wg_queue_len(struct wg_queue *);
349 static int wg_queue_enqueue_handshake(struct wg_queue *, struct wg_packet *);
350 static struct wg_packet *wg_queue_dequeue_handshake(struct wg_queue *);
351 static void wg_queue_push_staged(struct wg_queue *, struct wg_packet *);
352 static void wg_queue_enlist_staged(struct wg_queue *, struct wg_packet_list *);
353 static void wg_queue_delist_staged(struct wg_queue *, struct wg_packet_list *);
354 static void wg_queue_purge(struct wg_queue *);
355 static int wg_queue_both(struct wg_queue *, struct wg_queue *, struct wg_packet *);
356 static struct wg_packet *wg_queue_dequeue_serial(struct wg_queue *);
357 static struct wg_packet *wg_queue_dequeue_parallel(struct wg_queue *);
358 static bool wg_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *);
359 static void wg_peer_send_staged(struct wg_peer *);
360 static int wg_clone_create(struct if_clone *, int, caddr_t);
361 static void wg_qflush(struct ifnet *);
362 static inline int determine_af_and_pullup(struct mbuf **m, sa_family_t *af);
363 static int wg_xmit(struct ifnet *, struct mbuf *, sa_family_t, uint32_t);
364 static int wg_transmit(struct ifnet *, struct mbuf *);
365 static int wg_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *);
366 static void wg_clone_destroy(struct ifnet *);
367 static bool wgc_privileged(struct wg_softc *);
368 static int wgc_get(struct wg_softc *, struct wg_data_io *);
369 static int wgc_set(struct wg_softc *, struct wg_data_io *);
370 static int wg_up(struct wg_softc *);
371 static void wg_down(struct wg_softc *);
372 static void wg_reassign(struct ifnet *, struct vnet *, char *unused);
373 static void wg_init(void *);
374 static int wg_ioctl(struct ifnet *, u_long, caddr_t);
375 static void vnet_wg_init(const void *);
376 static void vnet_wg_uninit(const void *);
377 static int wg_module_init(void);
378 static void wg_module_deinit(void);
379 
380 /* TODO Peer */
381 static struct wg_peer *
382 wg_peer_alloc(struct wg_softc *sc, const uint8_t pub_key[WG_KEY_SIZE])
383 {
384 	struct wg_peer *peer;
385 
386 	sx_assert(&sc->sc_lock, SX_XLOCKED);
387 
388 	peer = malloc(sizeof(*peer), M_WG, M_WAITOK | M_ZERO);
389 	peer->p_remote = noise_remote_alloc(sc->sc_local, peer, pub_key);
390 	peer->p_tx_bytes = counter_u64_alloc(M_WAITOK);
391 	peer->p_rx_bytes = counter_u64_alloc(M_WAITOK);
392 	peer->p_id = peer_counter++;
393 	peer->p_sc = sc;
394 
395 	cookie_maker_init(&peer->p_cookie, pub_key);
396 
397 	rw_init(&peer->p_endpoint_lock, "wg_peer_endpoint");
398 
399 	wg_queue_init(&peer->p_stage_queue, "stageq");
400 	wg_queue_init(&peer->p_encrypt_serial, "txq");
401 	wg_queue_init(&peer->p_decrypt_serial, "rxq");
402 
403 	peer->p_enabled = false;
404 	peer->p_need_another_keepalive = false;
405 	peer->p_persistent_keepalive_interval = 0;
406 	callout_init(&peer->p_new_handshake, true);
407 	callout_init(&peer->p_send_keepalive, true);
408 	callout_init(&peer->p_retry_handshake, true);
409 	callout_init(&peer->p_persistent_keepalive, true);
410 	callout_init(&peer->p_zero_key_material, true);
411 
412 	mtx_init(&peer->p_handshake_mtx, "peer handshake", NULL, MTX_DEF);
413 	bzero(&peer->p_handshake_complete, sizeof(peer->p_handshake_complete));
414 	peer->p_handshake_retries = 0;
415 
416 	GROUPTASK_INIT(&peer->p_send, 0, (gtask_fn_t *)wg_deliver_out, peer);
417 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_send, peer, NULL, NULL, "wg send");
418 	GROUPTASK_INIT(&peer->p_recv, 0, (gtask_fn_t *)wg_deliver_in, peer);
419 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_recv, peer, NULL, NULL, "wg recv");
420 
421 	LIST_INIT(&peer->p_aips);
422 	peer->p_aips_num = 0;
423 
424 	return (peer);
425 }
426 
427 static void
428 wg_peer_free_deferred(struct noise_remote *r)
429 {
430 	struct wg_peer *peer = noise_remote_arg(r);
431 
432 	/* While there are no references remaining, we may still have
433 	 * p_{send,recv} executing (think empty queue, but wg_deliver_{in,out}
434 	 * needs to check the queue. We should wait for them and then free. */
435 	GROUPTASK_DRAIN(&peer->p_recv);
436 	GROUPTASK_DRAIN(&peer->p_send);
437 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_recv);
438 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_send);
439 
440 	wg_queue_deinit(&peer->p_decrypt_serial);
441 	wg_queue_deinit(&peer->p_encrypt_serial);
442 	wg_queue_deinit(&peer->p_stage_queue);
443 
444 	counter_u64_free(peer->p_tx_bytes);
445 	counter_u64_free(peer->p_rx_bytes);
446 	rw_destroy(&peer->p_endpoint_lock);
447 	mtx_destroy(&peer->p_handshake_mtx);
448 
449 	cookie_maker_free(&peer->p_cookie);
450 
451 	free(peer, M_WG);
452 }
453 
454 static void
455 wg_peer_destroy(struct wg_peer *peer)
456 {
457 	struct wg_softc *sc = peer->p_sc;
458 	sx_assert(&sc->sc_lock, SX_XLOCKED);
459 
460 	/* Disable remote and timers. This will prevent any new handshakes
461 	 * occuring. */
462 	noise_remote_disable(peer->p_remote);
463 	wg_timers_disable(peer);
464 
465 	/* Now we can remove all allowed IPs so no more packets will be routed
466 	 * to the peer. */
467 	wg_aip_remove_all(sc, peer);
468 
469 	/* Remove peer from the interface, then free. Some references may still
470 	 * exist to p_remote, so noise_remote_free will wait until they're all
471 	 * put to call wg_peer_free_deferred. */
472 	sc->sc_peers_num--;
473 	TAILQ_REMOVE(&sc->sc_peers, peer, p_entry);
474 	DPRINTF(sc, "Peer %" PRIu64 " destroyed\n", peer->p_id);
475 	noise_remote_free(peer->p_remote, wg_peer_free_deferred);
476 }
477 
478 static void
479 wg_peer_destroy_all(struct wg_softc *sc)
480 {
481 	struct wg_peer *peer, *tpeer;
482 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peers, p_entry, tpeer)
483 		wg_peer_destroy(peer);
484 }
485 
486 static void
487 wg_peer_set_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
488 {
489 	MPASS(e->e_remote.r_sa.sa_family != 0);
490 	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
491 		return;
492 
493 	rw_wlock(&peer->p_endpoint_lock);
494 	peer->p_endpoint = *e;
495 	rw_wunlock(&peer->p_endpoint_lock);
496 }
497 
498 static void
499 wg_peer_clear_src(struct wg_peer *peer)
500 {
501 	rw_wlock(&peer->p_endpoint_lock);
502 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
503 	rw_wunlock(&peer->p_endpoint_lock);
504 }
505 
506 static void
507 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
508 {
509 	rw_rlock(&peer->p_endpoint_lock);
510 	*e = peer->p_endpoint;
511 	rw_runlock(&peer->p_endpoint_lock);
512 }
513 
514 /* Allowed IP */
515 static int
516 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, sa_family_t af, const void *addr, uint8_t cidr)
517 {
518 	struct radix_node_head	*root;
519 	struct radix_node	*node;
520 	struct wg_aip		*aip;
521 	int			 ret = 0;
522 
523 	aip = malloc(sizeof(*aip), M_WG, M_WAITOK | M_ZERO);
524 	aip->a_peer = peer;
525 	aip->a_af = af;
526 
527 	switch (af) {
528 #ifdef INET
529 	case AF_INET:
530 		if (cidr > 32) cidr = 32;
531 		root = sc->sc_aip4;
532 		aip->a_addr.in = *(const struct in_addr *)addr;
533 		aip->a_mask.ip = htonl(~((1LL << (32 - cidr)) - 1) & 0xffffffff);
534 		aip->a_addr.ip &= aip->a_mask.ip;
535 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
536 		break;
537 #endif
538 #ifdef INET6
539 	case AF_INET6:
540 		if (cidr > 128) cidr = 128;
541 		root = sc->sc_aip6;
542 		aip->a_addr.in6 = *(const struct in6_addr *)addr;
543 		in6_prefixlen2mask(&aip->a_mask.in6, cidr);
544 		for (int i = 0; i < 4; i++)
545 			aip->a_addr.ip6[i] &= aip->a_mask.ip6[i];
546 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
547 		break;
548 #endif
549 	default:
550 		free(aip, M_WG);
551 		return (EAFNOSUPPORT);
552 	}
553 
554 	RADIX_NODE_HEAD_LOCK(root);
555 	node = root->rnh_addaddr(&aip->a_addr, &aip->a_mask, &root->rh, aip->a_nodes);
556 	if (node == aip->a_nodes) {
557 		LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
558 		peer->p_aips_num++;
559 	} else if (!node)
560 		node = root->rnh_lookup(&aip->a_addr, &aip->a_mask, &root->rh);
561 	if (!node) {
562 		free(aip, M_WG);
563 		return (ENOMEM);
564 	} else if (node != aip->a_nodes) {
565 		free(aip, M_WG);
566 		aip = (struct wg_aip *)node;
567 		if (aip->a_peer != peer) {
568 			LIST_REMOVE(aip, a_entry);
569 			aip->a_peer->p_aips_num--;
570 			aip->a_peer = peer;
571 			LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
572 			aip->a_peer->p_aips_num++;
573 		}
574 	}
575 	RADIX_NODE_HEAD_UNLOCK(root);
576 	return (ret);
577 }
578 
579 static struct wg_peer *
580 wg_aip_lookup(struct wg_softc *sc, sa_family_t af, void *a)
581 {
582 	struct radix_node_head	*root;
583 	struct radix_node	*node;
584 	struct wg_peer		*peer;
585 	struct aip_addr		 addr;
586 	RADIX_NODE_HEAD_RLOCK_TRACKER;
587 
588 	switch (af) {
589 	case AF_INET:
590 		root = sc->sc_aip4;
591 		memcpy(&addr.in, a, sizeof(addr.in));
592 		addr.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
593 		break;
594 	case AF_INET6:
595 		root = sc->sc_aip6;
596 		memcpy(&addr.in6, a, sizeof(addr.in6));
597 		addr.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
598 		break;
599 	default:
600 		return NULL;
601 	}
602 
603 	RADIX_NODE_HEAD_RLOCK(root);
604 	node = root->rnh_matchaddr(&addr, &root->rh);
605 	if (node != NULL) {
606 		peer = ((struct wg_aip *)node)->a_peer;
607 		noise_remote_ref(peer->p_remote);
608 	} else {
609 		peer = NULL;
610 	}
611 	RADIX_NODE_HEAD_RUNLOCK(root);
612 
613 	return (peer);
614 }
615 
616 static void
617 wg_aip_remove_all(struct wg_softc *sc, struct wg_peer *peer)
618 {
619 	struct wg_aip		*aip, *taip;
620 
621 	RADIX_NODE_HEAD_LOCK(sc->sc_aip4);
622 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
623 		if (aip->a_af == AF_INET) {
624 			if (sc->sc_aip4->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip4->rh) == NULL)
625 				panic("failed to delete aip %p", aip);
626 			LIST_REMOVE(aip, a_entry);
627 			peer->p_aips_num--;
628 			free(aip, M_WG);
629 		}
630 	}
631 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip4);
632 
633 	RADIX_NODE_HEAD_LOCK(sc->sc_aip6);
634 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
635 		if (aip->a_af == AF_INET6) {
636 			if (sc->sc_aip6->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip6->rh) == NULL)
637 				panic("failed to delete aip %p", aip);
638 			LIST_REMOVE(aip, a_entry);
639 			peer->p_aips_num--;
640 			free(aip, M_WG);
641 		}
642 	}
643 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip6);
644 
645 	if (!LIST_EMPTY(&peer->p_aips) || peer->p_aips_num != 0)
646 		panic("wg_aip_remove_all could not delete all %p", peer);
647 }
648 
649 static int
650 wg_socket_init(struct wg_softc *sc, in_port_t port)
651 {
652 	struct ucred *cred = sc->sc_ucred;
653 	struct socket *so4 = NULL, *so6 = NULL;
654 	int rc;
655 
656 	sx_assert(&sc->sc_lock, SX_XLOCKED);
657 
658 	if (!cred)
659 		return (EBUSY);
660 
661 	/*
662 	 * For socket creation, we use the creds of the thread that created the
663 	 * tunnel rather than the current thread to maintain the semantics that
664 	 * WireGuard has on Linux with network namespaces -- that the sockets
665 	 * are created in their home vnet so that they can be configured and
666 	 * functionally attached to a foreign vnet as the jail's only interface
667 	 * to the network.
668 	 */
669 #ifdef INET
670 	rc = socreate(AF_INET, &so4, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
671 	if (rc)
672 		goto out;
673 
674 	rc = udp_set_kernel_tunneling(so4, wg_input, NULL, sc);
675 	/*
676 	 * udp_set_kernel_tunneling can only fail if there is already a tunneling function set.
677 	 * This should never happen with a new socket.
678 	 */
679 	MPASS(rc == 0);
680 #endif
681 
682 #ifdef INET6
683 	rc = socreate(AF_INET6, &so6, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
684 	if (rc)
685 		goto out;
686 	rc = udp_set_kernel_tunneling(so6, wg_input, NULL, sc);
687 	MPASS(rc == 0);
688 #endif
689 
690 	if (sc->sc_socket.so_user_cookie) {
691 		rc = wg_socket_set_sockopt(so4, so6, SO_USER_COOKIE, &sc->sc_socket.so_user_cookie, sizeof(sc->sc_socket.so_user_cookie));
692 		if (rc)
693 			goto out;
694 	}
695 	rc = wg_socket_set_sockopt(so4, so6, SO_SETFIB, &sc->sc_socket.so_fibnum, sizeof(sc->sc_socket.so_fibnum));
696 	if (rc)
697 		goto out;
698 
699 	rc = wg_socket_bind(&so4, &so6, &port);
700 	if (!rc) {
701 		sc->sc_socket.so_port = port;
702 		wg_socket_set(sc, so4, so6);
703 	}
704 out:
705 	if (rc) {
706 		if (so4 != NULL)
707 			soclose(so4);
708 		if (so6 != NULL)
709 			soclose(so6);
710 	}
711 	return (rc);
712 }
713 
714 static int wg_socket_set_sockopt(struct socket *so4, struct socket *so6, int name, void *val, size_t len)
715 {
716 	int ret4 = 0, ret6 = 0;
717 	struct sockopt sopt = {
718 		.sopt_dir = SOPT_SET,
719 		.sopt_level = SOL_SOCKET,
720 		.sopt_name = name,
721 		.sopt_val = val,
722 		.sopt_valsize = len
723 	};
724 
725 	if (so4)
726 		ret4 = sosetopt(so4, &sopt);
727 	if (so6)
728 		ret6 = sosetopt(so6, &sopt);
729 	return (ret4 ?: ret6);
730 }
731 
732 static int wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
733 {
734 	struct wg_socket *so = &sc->sc_socket;
735 	int ret;
736 
737 	sx_assert(&sc->sc_lock, SX_XLOCKED);
738 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_USER_COOKIE, &user_cookie, sizeof(user_cookie));
739 	if (!ret)
740 		so->so_user_cookie = user_cookie;
741 	return (ret);
742 }
743 
744 static int wg_socket_set_fibnum(struct wg_softc *sc, int fibnum)
745 {
746 	struct wg_socket *so = &sc->sc_socket;
747 	int ret;
748 
749 	sx_assert(&sc->sc_lock, SX_XLOCKED);
750 
751 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_SETFIB, &fibnum, sizeof(fibnum));
752 	if (!ret)
753 		so->so_fibnum = fibnum;
754 	return (ret);
755 }
756 
757 static void
758 wg_socket_uninit(struct wg_softc *sc)
759 {
760 	wg_socket_set(sc, NULL, NULL);
761 }
762 
763 static void
764 wg_socket_set(struct wg_softc *sc, struct socket *new_so4, struct socket *new_so6)
765 {
766 	struct wg_socket *so = &sc->sc_socket;
767 	struct socket *so4, *so6;
768 
769 	sx_assert(&sc->sc_lock, SX_XLOCKED);
770 
771 	so4 = atomic_load_ptr(&so->so_so4);
772 	so6 = atomic_load_ptr(&so->so_so6);
773 	atomic_store_ptr(&so->so_so4, new_so4);
774 	atomic_store_ptr(&so->so_so6, new_so6);
775 
776 	if (!so4 && !so6)
777 		return;
778 	NET_EPOCH_WAIT();
779 	if (so4)
780 		soclose(so4);
781 	if (so6)
782 		soclose(so6);
783 }
784 
785 static int
786 wg_socket_bind(struct socket **in_so4, struct socket **in_so6, in_port_t *requested_port)
787 {
788 	struct socket *so4 = *in_so4, *so6 = *in_so6;
789 	int ret4 = 0, ret6 = 0;
790 	in_port_t port = *requested_port;
791 	struct sockaddr_in sin = {
792 		.sin_len = sizeof(struct sockaddr_in),
793 		.sin_family = AF_INET,
794 		.sin_port = htons(port)
795 	};
796 	struct sockaddr_in6 sin6 = {
797 		.sin6_len = sizeof(struct sockaddr_in6),
798 		.sin6_family = AF_INET6,
799 		.sin6_port = htons(port)
800 	};
801 
802 	if (so4) {
803 		ret4 = sobind(so4, (struct sockaddr *)&sin, curthread);
804 		if (ret4 && ret4 != EADDRNOTAVAIL)
805 			return (ret4);
806 		if (!ret4 && !sin.sin_port) {
807 			struct sockaddr_in *bound_sin;
808 			int ret = so4->so_proto->pr_sockaddr(so4,
809 			    (struct sockaddr **)&bound_sin);
810 			if (ret)
811 				return (ret);
812 			port = ntohs(bound_sin->sin_port);
813 			sin6.sin6_port = bound_sin->sin_port;
814 			free(bound_sin, M_SONAME);
815 		}
816 	}
817 
818 	if (so6) {
819 		ret6 = sobind(so6, (struct sockaddr *)&sin6, curthread);
820 		if (ret6 && ret6 != EADDRNOTAVAIL)
821 			return (ret6);
822 		if (!ret6 && !sin6.sin6_port) {
823 			struct sockaddr_in6 *bound_sin6;
824 			int ret = so6->so_proto->pr_sockaddr(so6,
825 			    (struct sockaddr **)&bound_sin6);
826 			if (ret)
827 				return (ret);
828 			port = ntohs(bound_sin6->sin6_port);
829 			free(bound_sin6, M_SONAME);
830 		}
831 	}
832 
833 	if (ret4 && ret6)
834 		return (ret4);
835 	*requested_port = port;
836 	if (ret4 && !ret6 && so4) {
837 		soclose(so4);
838 		*in_so4 = NULL;
839 	} else if (ret6 && !ret4 && so6) {
840 		soclose(so6);
841 		*in_so6 = NULL;
842 	}
843 	return (0);
844 }
845 
846 static int
847 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
848 {
849 	struct epoch_tracker et;
850 	struct sockaddr *sa;
851 	struct wg_socket *so = &sc->sc_socket;
852 	struct socket *so4, *so6;
853 	struct mbuf *control = NULL;
854 	int ret = 0;
855 	size_t len = m->m_pkthdr.len;
856 
857 	/* Get local control address before locking */
858 	if (e->e_remote.r_sa.sa_family == AF_INET) {
859 		if (e->e_local.l_in.s_addr != INADDR_ANY)
860 			control = sbcreatecontrol((caddr_t)&e->e_local.l_in,
861 			    sizeof(struct in_addr), IP_SENDSRCADDR,
862 			    IPPROTO_IP, M_NOWAIT);
863 #ifdef INET6
864 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
865 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
866 			control = sbcreatecontrol((caddr_t)&e->e_local.l_pktinfo6,
867 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
868 			    IPPROTO_IPV6, M_NOWAIT);
869 #endif
870 	} else {
871 		m_freem(m);
872 		return (EAFNOSUPPORT);
873 	}
874 
875 	/* Get remote address */
876 	sa = &e->e_remote.r_sa;
877 
878 	NET_EPOCH_ENTER(et);
879 	so4 = atomic_load_ptr(&so->so_so4);
880 	so6 = atomic_load_ptr(&so->so_so6);
881 	if (e->e_remote.r_sa.sa_family == AF_INET && so4 != NULL)
882 		ret = sosend(so4, sa, NULL, m, control, 0, curthread);
883 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && so6 != NULL)
884 		ret = sosend(so6, sa, NULL, m, control, 0, curthread);
885 	else {
886 		ret = ENOTCONN;
887 		m_freem(control);
888 		m_freem(m);
889 	}
890 	NET_EPOCH_EXIT(et);
891 	if (ret == 0) {
892 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
893 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
894 	}
895 	return (ret);
896 }
897 
898 static void
899 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf, size_t len)
900 {
901 	struct mbuf	*m;
902 	int		 ret = 0;
903 	bool		 retried = false;
904 
905 retry:
906 	m = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
907 	if (!m) {
908 		ret = ENOMEM;
909 		goto out;
910 	}
911 	m_copyback(m, 0, len, buf);
912 
913 	if (ret == 0) {
914 		ret = wg_send(sc, e, m);
915 		/* Retry if we couldn't bind to e->e_local */
916 		if (ret == EADDRNOTAVAIL && !retried) {
917 			bzero(&e->e_local, sizeof(e->e_local));
918 			retried = true;
919 			goto retry;
920 		}
921 	} else {
922 		ret = wg_send(sc, e, m);
923 	}
924 out:
925 	if (ret)
926 		DPRINTF(sc, "Unable to send packet: %d\n", ret);
927 }
928 
929 /* Timers */
930 static void
931 wg_timers_enable(struct wg_peer *peer)
932 {
933 	atomic_store_bool(&peer->p_enabled, true);
934 	wg_timers_run_persistent_keepalive(peer);
935 }
936 
937 static void
938 wg_timers_disable(struct wg_peer *peer)
939 {
940 	/* By setting p_enabled = false, then calling NET_EPOCH_WAIT, we can be
941 	 * sure no new handshakes are created after the wait. This is because
942 	 * all callout_resets (scheduling the callout) are guarded by
943 	 * p_enabled. We can be sure all sections that read p_enabled and then
944 	 * optionally call callout_reset are finished as they are surrounded by
945 	 * NET_EPOCH_{ENTER,EXIT}.
946 	 *
947 	 * However, as new callouts may be scheduled during NET_EPOCH_WAIT (but
948 	 * not after), we stop all callouts leaving no callouts active.
949 	 *
950 	 * We should also pull NET_EPOCH_WAIT out of the FOREACH(peer) loops, but the
951 	 * performance impact is acceptable for the time being. */
952 	atomic_store_bool(&peer->p_enabled, false);
953 	NET_EPOCH_WAIT();
954 	atomic_store_bool(&peer->p_need_another_keepalive, false);
955 
956 	callout_stop(&peer->p_new_handshake);
957 	callout_stop(&peer->p_send_keepalive);
958 	callout_stop(&peer->p_retry_handshake);
959 	callout_stop(&peer->p_persistent_keepalive);
960 	callout_stop(&peer->p_zero_key_material);
961 }
962 
963 static void
964 wg_timers_set_persistent_keepalive(struct wg_peer *peer, uint16_t interval)
965 {
966 	struct epoch_tracker et;
967 	if (interval != peer->p_persistent_keepalive_interval) {
968 		atomic_store_16(&peer->p_persistent_keepalive_interval, interval);
969 		NET_EPOCH_ENTER(et);
970 		if (atomic_load_bool(&peer->p_enabled))
971 			wg_timers_run_persistent_keepalive(peer);
972 		NET_EPOCH_EXIT(et);
973 	}
974 }
975 
976 static void
977 wg_timers_get_last_handshake(struct wg_peer *peer, struct wg_timespec64 *time)
978 {
979 	mtx_lock(&peer->p_handshake_mtx);
980 	time->tv_sec = peer->p_handshake_complete.tv_sec;
981 	time->tv_nsec = peer->p_handshake_complete.tv_nsec;
982 	mtx_unlock(&peer->p_handshake_mtx);
983 }
984 
985 static void
986 wg_timers_event_data_sent(struct wg_peer *peer)
987 {
988 	struct epoch_tracker et;
989 	NET_EPOCH_ENTER(et);
990 	if (atomic_load_bool(&peer->p_enabled) &&
991 	    !callout_pending(&peer->p_new_handshake))
992 		callout_reset(&peer->p_new_handshake, MSEC_2_TICKS(
993 		    NEW_HANDSHAKE_TIMEOUT * 1000 +
994 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
995 		    wg_timers_run_new_handshake, peer);
996 	NET_EPOCH_EXIT(et);
997 }
998 
999 static void
1000 wg_timers_event_data_received(struct wg_peer *peer)
1001 {
1002 	struct epoch_tracker et;
1003 	NET_EPOCH_ENTER(et);
1004 	if (atomic_load_bool(&peer->p_enabled)) {
1005 		if (!callout_pending(&peer->p_send_keepalive))
1006 			callout_reset(&peer->p_send_keepalive,
1007 			    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1008 			    wg_timers_run_send_keepalive, peer);
1009 		else
1010 			atomic_store_bool(&peer->p_need_another_keepalive,
1011 			    true);
1012 	}
1013 	NET_EPOCH_EXIT(et);
1014 }
1015 
1016 static void
1017 wg_timers_event_any_authenticated_packet_sent(struct wg_peer *peer)
1018 {
1019 	callout_stop(&peer->p_send_keepalive);
1020 }
1021 
1022 static void
1023 wg_timers_event_any_authenticated_packet_received(struct wg_peer *peer)
1024 {
1025 	callout_stop(&peer->p_new_handshake);
1026 }
1027 
1028 static void
1029 wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *peer)
1030 {
1031 	struct epoch_tracker et;
1032 	uint16_t interval;
1033 	NET_EPOCH_ENTER(et);
1034 	interval = atomic_load_16(&peer->p_persistent_keepalive_interval);
1035 	if (atomic_load_bool(&peer->p_enabled) && interval > 0)
1036 		callout_reset(&peer->p_persistent_keepalive,
1037 		     MSEC_2_TICKS(interval * 1000),
1038 		     wg_timers_run_persistent_keepalive, peer);
1039 	NET_EPOCH_EXIT(et);
1040 }
1041 
1042 static void
1043 wg_timers_event_handshake_initiated(struct wg_peer *peer)
1044 {
1045 	struct epoch_tracker et;
1046 	NET_EPOCH_ENTER(et);
1047 	if (atomic_load_bool(&peer->p_enabled))
1048 		callout_reset(&peer->p_retry_handshake, MSEC_2_TICKS(
1049 		    REKEY_TIMEOUT * 1000 +
1050 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
1051 		    wg_timers_run_retry_handshake, peer);
1052 	NET_EPOCH_EXIT(et);
1053 }
1054 
1055 static void
1056 wg_timers_event_handshake_complete(struct wg_peer *peer)
1057 {
1058 	struct epoch_tracker et;
1059 	NET_EPOCH_ENTER(et);
1060 	if (atomic_load_bool(&peer->p_enabled)) {
1061 		mtx_lock(&peer->p_handshake_mtx);
1062 		callout_stop(&peer->p_retry_handshake);
1063 		peer->p_handshake_retries = 0;
1064 		getnanotime(&peer->p_handshake_complete);
1065 		mtx_unlock(&peer->p_handshake_mtx);
1066 		wg_timers_run_send_keepalive(peer);
1067 	}
1068 	NET_EPOCH_EXIT(et);
1069 }
1070 
1071 static void
1072 wg_timers_event_session_derived(struct wg_peer *peer)
1073 {
1074 	struct epoch_tracker et;
1075 	NET_EPOCH_ENTER(et);
1076 	if (atomic_load_bool(&peer->p_enabled))
1077 		callout_reset(&peer->p_zero_key_material,
1078 		    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1079 		    wg_timers_run_zero_key_material, peer);
1080 	NET_EPOCH_EXIT(et);
1081 }
1082 
1083 static void
1084 wg_timers_event_want_initiation(struct wg_peer *peer)
1085 {
1086 	struct epoch_tracker et;
1087 	NET_EPOCH_ENTER(et);
1088 	if (atomic_load_bool(&peer->p_enabled))
1089 		wg_timers_run_send_initiation(peer, false);
1090 	NET_EPOCH_EXIT(et);
1091 }
1092 
1093 static void
1094 wg_timers_run_send_initiation(struct wg_peer *peer, bool is_retry)
1095 {
1096 	if (!is_retry)
1097 		peer->p_handshake_retries = 0;
1098 	if (noise_remote_initiation_expired(peer->p_remote) == ETIMEDOUT)
1099 		wg_send_initiation(peer);
1100 }
1101 
1102 static void
1103 wg_timers_run_retry_handshake(void *_peer)
1104 {
1105 	struct epoch_tracker et;
1106 	struct wg_peer *peer = _peer;
1107 
1108 	mtx_lock(&peer->p_handshake_mtx);
1109 	if (peer->p_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1110 		peer->p_handshake_retries++;
1111 		mtx_unlock(&peer->p_handshake_mtx);
1112 
1113 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1114 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1115 		    REKEY_TIMEOUT, peer->p_handshake_retries + 1);
1116 		wg_peer_clear_src(peer);
1117 		wg_timers_run_send_initiation(peer, true);
1118 	} else {
1119 		mtx_unlock(&peer->p_handshake_mtx);
1120 
1121 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1122 		    "after %d retries, giving up\n", peer->p_id,
1123 		    MAX_TIMER_HANDSHAKES + 2);
1124 
1125 		callout_stop(&peer->p_send_keepalive);
1126 		wg_queue_purge(&peer->p_stage_queue);
1127 		NET_EPOCH_ENTER(et);
1128 		if (atomic_load_bool(&peer->p_enabled) &&
1129 		    !callout_pending(&peer->p_zero_key_material))
1130 			callout_reset(&peer->p_zero_key_material,
1131 			    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1132 			    wg_timers_run_zero_key_material, peer);
1133 		NET_EPOCH_EXIT(et);
1134 	}
1135 }
1136 
1137 static void
1138 wg_timers_run_send_keepalive(void *_peer)
1139 {
1140 	struct epoch_tracker et;
1141 	struct wg_peer *peer = _peer;
1142 
1143 	wg_send_keepalive(peer);
1144 	NET_EPOCH_ENTER(et);
1145 	if (atomic_load_bool(&peer->p_enabled) &&
1146 	    atomic_load_bool(&peer->p_need_another_keepalive)) {
1147 		atomic_store_bool(&peer->p_need_another_keepalive, false);
1148 		callout_reset(&peer->p_send_keepalive,
1149 		    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1150 		    wg_timers_run_send_keepalive, peer);
1151 	}
1152 	NET_EPOCH_EXIT(et);
1153 }
1154 
1155 static void
1156 wg_timers_run_new_handshake(void *_peer)
1157 {
1158 	struct wg_peer *peer = _peer;
1159 
1160 	DPRINTF(peer->p_sc, "Retrying handshake with peer %" PRIu64 " because we "
1161 	    "stopped hearing back after %d seconds\n",
1162 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1163 
1164 	wg_peer_clear_src(peer);
1165 	wg_timers_run_send_initiation(peer, false);
1166 }
1167 
1168 static void
1169 wg_timers_run_zero_key_material(void *_peer)
1170 {
1171 	struct wg_peer *peer = _peer;
1172 
1173 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %" PRIu64 ", since we "
1174 	    "haven't received a new one in %d seconds\n",
1175 	    peer->p_id, REJECT_AFTER_TIME * 3);
1176 	noise_remote_keypairs_clear(peer->p_remote);
1177 }
1178 
1179 static void
1180 wg_timers_run_persistent_keepalive(void *_peer)
1181 {
1182 	struct wg_peer *peer = _peer;
1183 
1184 	if (atomic_load_16(&peer->p_persistent_keepalive_interval) > 0)
1185 		wg_send_keepalive(peer);
1186 }
1187 
1188 /* TODO Handshake */
1189 static void
1190 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1191 {
1192 	struct wg_endpoint endpoint;
1193 
1194 	counter_u64_add(peer->p_tx_bytes, len);
1195 	wg_timers_event_any_authenticated_packet_traversal(peer);
1196 	wg_timers_event_any_authenticated_packet_sent(peer);
1197 	wg_peer_get_endpoint(peer, &endpoint);
1198 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1199 }
1200 
1201 static void
1202 wg_send_initiation(struct wg_peer *peer)
1203 {
1204 	struct wg_pkt_initiation pkt;
1205 
1206 	if (noise_create_initiation(peer->p_remote, &pkt.s_idx, pkt.ue,
1207 	    pkt.es, pkt.ets) != 0)
1208 		return;
1209 
1210 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %" PRIu64 "\n", peer->p_id);
1211 
1212 	pkt.t = WG_PKT_INITIATION;
1213 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1214 	    sizeof(pkt) - sizeof(pkt.m));
1215 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1216 	wg_timers_event_handshake_initiated(peer);
1217 }
1218 
1219 static void
1220 wg_send_response(struct wg_peer *peer)
1221 {
1222 	struct wg_pkt_response pkt;
1223 
1224 	if (noise_create_response(peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1225 	    pkt.ue, pkt.en) != 0)
1226 		return;
1227 
1228 	DPRINTF(peer->p_sc, "Sending handshake response to peer %" PRIu64 "\n", peer->p_id);
1229 
1230 	wg_timers_event_session_derived(peer);
1231 	pkt.t = WG_PKT_RESPONSE;
1232 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1233 	     sizeof(pkt)-sizeof(pkt.m));
1234 	wg_peer_send_buf(peer, (uint8_t*)&pkt, sizeof(pkt));
1235 }
1236 
1237 static void
1238 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1239     struct wg_endpoint *e)
1240 {
1241 	struct wg_pkt_cookie	pkt;
1242 
1243 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1244 
1245 	pkt.t = WG_PKT_COOKIE;
1246 	pkt.r_idx = idx;
1247 
1248 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1249 	    pkt.ec, &e->e_remote.r_sa);
1250 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1251 }
1252 
1253 static void
1254 wg_send_keepalive(struct wg_peer *peer)
1255 {
1256 	struct wg_packet *pkt;
1257 	struct mbuf *m;
1258 
1259 	if (wg_queue_len(&peer->p_stage_queue) > 0)
1260 		goto send;
1261 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1262 		return;
1263 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1264 		m_freem(m);
1265 		return;
1266 	}
1267 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
1268 	DPRINTF(peer->p_sc, "Sending keepalive packet to peer %" PRIu64 "\n", peer->p_id);
1269 send:
1270 	wg_peer_send_staged(peer);
1271 }
1272 
1273 static void
1274 wg_handshake(struct wg_softc *sc, struct wg_packet *pkt)
1275 {
1276 	struct wg_pkt_initiation	*init;
1277 	struct wg_pkt_response		*resp;
1278 	struct wg_pkt_cookie		*cook;
1279 	struct wg_endpoint		*e;
1280 	struct wg_peer			*peer;
1281 	struct mbuf			*m;
1282 	struct noise_remote		*remote = NULL;
1283 	int				 res;
1284 	bool				 underload = false;
1285 	static sbintime_t		 wg_last_underload; /* sbinuptime */
1286 
1287 	underload = wg_queue_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES / 8;
1288 	if (underload) {
1289 		wg_last_underload = getsbinuptime();
1290 	} else if (wg_last_underload) {
1291 		underload = wg_last_underload + UNDERLOAD_TIMEOUT * SBT_1S > getsbinuptime();
1292 		if (!underload)
1293 			wg_last_underload = 0;
1294 	}
1295 
1296 	m = pkt->p_mbuf;
1297 	e = &pkt->p_endpoint;
1298 
1299 	if ((pkt->p_mbuf = m = m_pullup(m, m->m_pkthdr.len)) == NULL)
1300 		goto error;
1301 
1302 	switch (*mtod(m, uint32_t *)) {
1303 	case WG_PKT_INITIATION:
1304 		init = mtod(m, struct wg_pkt_initiation *);
1305 
1306 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1307 				init, sizeof(*init) - sizeof(init->m),
1308 				underload, &e->e_remote.r_sa,
1309 				sc->sc_ifp->if_vnet);
1310 
1311 		if (res == EINVAL) {
1312 			DPRINTF(sc, "Invalid initiation MAC\n");
1313 			goto error;
1314 		} else if (res == ECONNREFUSED) {
1315 			DPRINTF(sc, "Handshake ratelimited\n");
1316 			goto error;
1317 		} else if (res == EAGAIN) {
1318 			wg_send_cookie(sc, &init->m, init->s_idx, e);
1319 			goto error;
1320 		} else if (res != 0) {
1321 			panic("unexpected response: %d\n", res);
1322 		}
1323 
1324 		if (noise_consume_initiation(sc->sc_local, &remote,
1325 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1326 			DPRINTF(sc, "Invalid handshake initiation\n");
1327 			goto error;
1328 		}
1329 
1330 		peer = noise_remote_arg(remote);
1331 
1332 		DPRINTF(sc, "Receiving handshake initiation from peer %" PRIu64 "\n", peer->p_id);
1333 
1334 		wg_peer_set_endpoint(peer, e);
1335 		wg_send_response(peer);
1336 		break;
1337 	case WG_PKT_RESPONSE:
1338 		resp = mtod(m, struct wg_pkt_response *);
1339 
1340 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1341 				resp, sizeof(*resp) - sizeof(resp->m),
1342 				underload, &e->e_remote.r_sa,
1343 				sc->sc_ifp->if_vnet);
1344 
1345 		if (res == EINVAL) {
1346 			DPRINTF(sc, "Invalid response MAC\n");
1347 			goto error;
1348 		} else if (res == ECONNREFUSED) {
1349 			DPRINTF(sc, "Handshake ratelimited\n");
1350 			goto error;
1351 		} else if (res == EAGAIN) {
1352 			wg_send_cookie(sc, &resp->m, resp->s_idx, e);
1353 			goto error;
1354 		} else if (res != 0) {
1355 			panic("unexpected response: %d\n", res);
1356 		}
1357 
1358 		if (noise_consume_response(sc->sc_local, &remote,
1359 		    resp->s_idx, resp->r_idx, resp->ue, resp->en) != 0) {
1360 			DPRINTF(sc, "Invalid handshake response\n");
1361 			goto error;
1362 		}
1363 
1364 		peer = noise_remote_arg(remote);
1365 		DPRINTF(sc, "Receiving handshake response from peer %" PRIu64 "\n", peer->p_id);
1366 
1367 		wg_peer_set_endpoint(peer, e);
1368 		wg_timers_event_session_derived(peer);
1369 		wg_timers_event_handshake_complete(peer);
1370 		break;
1371 	case WG_PKT_COOKIE:
1372 		cook = mtod(m, struct wg_pkt_cookie *);
1373 
1374 		if ((remote = noise_remote_index(sc->sc_local, cook->r_idx)) == NULL) {
1375 			DPRINTF(sc, "Unknown cookie index\n");
1376 			goto error;
1377 		}
1378 
1379 		peer = noise_remote_arg(remote);
1380 
1381 		if (cookie_maker_consume_payload(&peer->p_cookie,
1382 		    cook->nonce, cook->ec) == 0) {
1383 			DPRINTF(sc, "Receiving cookie response\n");
1384 		} else {
1385 			DPRINTF(sc, "Could not decrypt cookie response\n");
1386 			goto error;
1387 		}
1388 
1389 		goto not_authenticated;
1390 	default:
1391 		panic("invalid packet in handshake queue");
1392 	}
1393 
1394 	wg_timers_event_any_authenticated_packet_received(peer);
1395 	wg_timers_event_any_authenticated_packet_traversal(peer);
1396 
1397 not_authenticated:
1398 	counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len);
1399 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1400 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1401 error:
1402 	if (remote != NULL)
1403 		noise_remote_put(remote);
1404 	wg_packet_free(pkt);
1405 }
1406 
1407 static void
1408 wg_softc_handshake_receive(struct wg_softc *sc)
1409 {
1410 	struct wg_packet *pkt;
1411 	while ((pkt = wg_queue_dequeue_handshake(&sc->sc_handshake_queue)) != NULL)
1412 		wg_handshake(sc, pkt);
1413 }
1414 
1415 static void
1416 wg_mbuf_reset(struct mbuf *m)
1417 {
1418 
1419 	struct m_tag *t, *tmp;
1420 
1421 	/*
1422 	 * We want to reset the mbuf to a newly allocated state, containing
1423 	 * just the packet contents. Unfortunately FreeBSD doesn't seem to
1424 	 * offer this anywhere, so we have to make it up as we go. If we can
1425 	 * get this in kern/kern_mbuf.c, that would be best.
1426 	 *
1427 	 * Notice: this may break things unexpectedly but it is better to fail
1428 	 *         closed in the extreme case than leak informtion in every
1429 	 *         case.
1430 	 *
1431 	 * With that said, all this attempts to do is remove any extraneous
1432 	 * information that could be present.
1433 	 */
1434 
1435 	M_ASSERTPKTHDR(m);
1436 
1437 	m->m_flags &= ~(M_BCAST|M_MCAST|M_VLANTAG|M_PROMISC|M_PROTOFLAGS);
1438 
1439 	M_HASHTYPE_CLEAR(m);
1440 #ifdef NUMA
1441         m->m_pkthdr.numa_domain = M_NODOM;
1442 #endif
1443 	SLIST_FOREACH_SAFE(t, &m->m_pkthdr.tags, m_tag_link, tmp) {
1444 		if ((t->m_tag_id != 0 || t->m_tag_cookie != MTAG_WGLOOP) &&
1445 		    t->m_tag_id != PACKET_TAG_MACLABEL)
1446 			m_tag_delete(m, t);
1447 	}
1448 
1449 	KASSERT((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0,
1450 	    ("%s: mbuf %p has a send tag", __func__, m));
1451 
1452 	m->m_pkthdr.csum_flags = 0;
1453 	m->m_pkthdr.PH_per.sixtyfour[0] = 0;
1454 	m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
1455 }
1456 
1457 static inline unsigned int
1458 calculate_padding(struct wg_packet *pkt)
1459 {
1460 	unsigned int padded_size, last_unit = pkt->p_mbuf->m_pkthdr.len;
1461 
1462 	if (__predict_false(!pkt->p_mtu))
1463 		return (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
1464 
1465 	if (__predict_false(last_unit > pkt->p_mtu))
1466 		last_unit %= pkt->p_mtu;
1467 
1468 	padded_size = (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
1469 	if (pkt->p_mtu < padded_size)
1470 		padded_size = pkt->p_mtu;
1471 	return padded_size - last_unit;
1472 }
1473 
1474 static void
1475 wg_encrypt(struct wg_softc *sc, struct wg_packet *pkt)
1476 {
1477 	static const uint8_t	 padding[WG_PKT_PADDING] = { 0 };
1478 	struct wg_pkt_data	*data;
1479 	struct wg_peer		*peer;
1480 	struct noise_remote	*remote;
1481 	struct mbuf		*m;
1482 	uint32_t		 idx;
1483 	unsigned int		 padlen;
1484 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1485 
1486 	remote = noise_keypair_remote(pkt->p_keypair);
1487 	peer = noise_remote_arg(remote);
1488 	m = pkt->p_mbuf;
1489 
1490 	/* Pad the packet */
1491 	padlen = calculate_padding(pkt);
1492 	if (padlen != 0 && !m_append(m, padlen, padding))
1493 		goto out;
1494 
1495 	/* Do encryption */
1496 	if (noise_keypair_encrypt(pkt->p_keypair, &idx, pkt->p_nonce, m) != 0)
1497 		goto out;
1498 
1499 	/* Put header into packet */
1500 	M_PREPEND(m, sizeof(struct wg_pkt_data), M_NOWAIT);
1501 	if (m == NULL)
1502 		goto out;
1503 	data = mtod(m, struct wg_pkt_data *);
1504 	data->t = WG_PKT_DATA;
1505 	data->r_idx = idx;
1506 	data->nonce = htole64(pkt->p_nonce);
1507 
1508 	wg_mbuf_reset(m);
1509 	state = WG_PACKET_CRYPTED;
1510 out:
1511 	pkt->p_mbuf = m;
1512 	wmb();
1513 	pkt->p_state = state;
1514 	GROUPTASK_ENQUEUE(&peer->p_send);
1515 	noise_remote_put(remote);
1516 }
1517 
1518 static void
1519 wg_decrypt(struct wg_softc *sc, struct wg_packet *pkt)
1520 {
1521 	struct wg_peer		*peer, *allowed_peer;
1522 	struct noise_remote	*remote;
1523 	struct mbuf		*m;
1524 	int			 len;
1525 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1526 
1527 	remote = noise_keypair_remote(pkt->p_keypair);
1528 	peer = noise_remote_arg(remote);
1529 	m = pkt->p_mbuf;
1530 
1531 	/* Read nonce and then adjust to remove the header. */
1532 	pkt->p_nonce = le64toh(mtod(m, struct wg_pkt_data *)->nonce);
1533 	m_adj(m, sizeof(struct wg_pkt_data));
1534 
1535 	if (noise_keypair_decrypt(pkt->p_keypair, pkt->p_nonce, m) != 0)
1536 		goto out;
1537 
1538 	/* A packet with length 0 is a keepalive packet */
1539 	if (__predict_false(m->m_pkthdr.len == 0)) {
1540 		DPRINTF(sc, "Receiving keepalive packet from peer "
1541 		    "%" PRIu64 "\n", peer->p_id);
1542 		state = WG_PACKET_CRYPTED;
1543 		goto out;
1544 	}
1545 
1546 	/*
1547 	 * We can let the network stack handle the intricate validation of the
1548 	 * IP header, we just worry about the sizeof and the version, so we can
1549 	 * read the source address in wg_aip_lookup.
1550 	 */
1551 
1552 	if (determine_af_and_pullup(&m, &pkt->p_af) == 0) {
1553 		if (pkt->p_af == AF_INET) {
1554 			struct ip *ip = mtod(m, struct ip *);
1555 			allowed_peer = wg_aip_lookup(sc, AF_INET, &ip->ip_src);
1556 			len = ntohs(ip->ip_len);
1557 			if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1558 				m_adj(m, len - m->m_pkthdr.len);
1559 		} else if (pkt->p_af == AF_INET6) {
1560 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1561 			allowed_peer = wg_aip_lookup(sc, AF_INET6, &ip6->ip6_src);
1562 			len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1563 			if (len < m->m_pkthdr.len)
1564 				m_adj(m, len - m->m_pkthdr.len);
1565 		} else
1566 			panic("determine_af_and_pullup returned unexpected value");
1567 	} else {
1568 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from peer %" PRIu64 "\n", peer->p_id);
1569 		goto out;
1570 	}
1571 
1572 	/* We only want to compare the address, not dereference, so drop the ref. */
1573 	if (allowed_peer != NULL)
1574 		noise_remote_put(allowed_peer->p_remote);
1575 
1576 	if (__predict_false(peer != allowed_peer)) {
1577 		DPRINTF(sc, "Packet has unallowed src IP from peer %" PRIu64 "\n", peer->p_id);
1578 		goto out;
1579 	}
1580 
1581 	wg_mbuf_reset(m);
1582 	state = WG_PACKET_CRYPTED;
1583 out:
1584 	pkt->p_mbuf = m;
1585 	wmb();
1586 	pkt->p_state = state;
1587 	GROUPTASK_ENQUEUE(&peer->p_recv);
1588 	noise_remote_put(remote);
1589 }
1590 
1591 static void
1592 wg_softc_decrypt(struct wg_softc *sc)
1593 {
1594 	struct wg_packet *pkt;
1595 
1596 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_decrypt_parallel)) != NULL)
1597 		wg_decrypt(sc, pkt);
1598 }
1599 
1600 static void
1601 wg_softc_encrypt(struct wg_softc *sc)
1602 {
1603 	struct wg_packet *pkt;
1604 
1605 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_encrypt_parallel)) != NULL)
1606 		wg_encrypt(sc, pkt);
1607 }
1608 
1609 static void
1610 wg_encrypt_dispatch(struct wg_softc *sc)
1611 {
1612 	/*
1613 	 * The update to encrypt_last_cpu is racey such that we may
1614 	 * reschedule the task for the same CPU multiple times, but
1615 	 * the race doesn't really matter.
1616 	 */
1617 	u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus;
1618 	sc->sc_encrypt_last_cpu = cpu;
1619 	GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]);
1620 }
1621 
1622 static void
1623 wg_decrypt_dispatch(struct wg_softc *sc)
1624 {
1625 	u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus;
1626 	sc->sc_decrypt_last_cpu = cpu;
1627 	GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]);
1628 }
1629 
1630 static void
1631 wg_deliver_out(struct wg_peer *peer)
1632 {
1633 	struct wg_endpoint	 endpoint;
1634 	struct wg_softc		*sc = peer->p_sc;
1635 	struct wg_packet	*pkt;
1636 	struct mbuf		*m;
1637 	int			 rc, len;
1638 
1639 	wg_peer_get_endpoint(peer, &endpoint);
1640 
1641 	while ((pkt = wg_queue_dequeue_serial(&peer->p_encrypt_serial)) != NULL) {
1642 		if (pkt->p_state != WG_PACKET_CRYPTED)
1643 			goto error;
1644 
1645 		m = pkt->p_mbuf;
1646 		pkt->p_mbuf = NULL;
1647 
1648 		len = m->m_pkthdr.len;
1649 
1650 		wg_timers_event_any_authenticated_packet_traversal(peer);
1651 		wg_timers_event_any_authenticated_packet_sent(peer);
1652 		rc = wg_send(sc, &endpoint, m);
1653 		if (rc == 0) {
1654 			if (len > (sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN))
1655 				wg_timers_event_data_sent(peer);
1656 			counter_u64_add(peer->p_tx_bytes, len);
1657 		} else if (rc == EADDRNOTAVAIL) {
1658 			wg_peer_clear_src(peer);
1659 			wg_peer_get_endpoint(peer, &endpoint);
1660 			goto error;
1661 		} else {
1662 			goto error;
1663 		}
1664 		wg_packet_free(pkt);
1665 		if (noise_keep_key_fresh_send(peer->p_remote))
1666 			wg_timers_event_want_initiation(peer);
1667 		continue;
1668 error:
1669 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1670 		wg_packet_free(pkt);
1671 	}
1672 }
1673 
1674 static void
1675 wg_deliver_in(struct wg_peer *peer)
1676 {
1677 	struct wg_softc		*sc = peer->p_sc;
1678 	struct ifnet		*ifp = sc->sc_ifp;
1679 	struct wg_packet	*pkt;
1680 	struct mbuf		*m;
1681 	struct epoch_tracker	 et;
1682 
1683 	while ((pkt = wg_queue_dequeue_serial(&peer->p_decrypt_serial)) != NULL) {
1684 		if (pkt->p_state != WG_PACKET_CRYPTED)
1685 			goto error;
1686 
1687 		m = pkt->p_mbuf;
1688 		if (noise_keypair_nonce_check(pkt->p_keypair, pkt->p_nonce) != 0)
1689 			goto error;
1690 
1691 		if (noise_keypair_received_with(pkt->p_keypair) == ECONNRESET)
1692 			wg_timers_event_handshake_complete(peer);
1693 
1694 		wg_timers_event_any_authenticated_packet_received(peer);
1695 		wg_timers_event_any_authenticated_packet_traversal(peer);
1696 		wg_peer_set_endpoint(peer, &pkt->p_endpoint);
1697 
1698 		counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len +
1699 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1700 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1701 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len +
1702 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1703 
1704 		if (m->m_pkthdr.len == 0)
1705 			goto done;
1706 
1707 		MPASS(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
1708 		pkt->p_mbuf = NULL;
1709 
1710 		m->m_pkthdr.rcvif = ifp;
1711 
1712 		NET_EPOCH_ENTER(et);
1713 		BPF_MTAP2_AF(ifp, m, pkt->p_af);
1714 
1715 		CURVNET_SET(ifp->if_vnet);
1716 		M_SETFIB(m, ifp->if_fib);
1717 		if (pkt->p_af == AF_INET)
1718 			netisr_dispatch(NETISR_IP, m);
1719 		if (pkt->p_af == AF_INET6)
1720 			netisr_dispatch(NETISR_IPV6, m);
1721 		CURVNET_RESTORE();
1722 		NET_EPOCH_EXIT(et);
1723 
1724 		wg_timers_event_data_received(peer);
1725 
1726 done:
1727 		if (noise_keep_key_fresh_recv(peer->p_remote))
1728 			wg_timers_event_want_initiation(peer);
1729 		wg_packet_free(pkt);
1730 		continue;
1731 error:
1732 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1733 		wg_packet_free(pkt);
1734 	}
1735 }
1736 
1737 static struct wg_packet *
1738 wg_packet_alloc(struct mbuf *m)
1739 {
1740 	struct wg_packet *pkt;
1741 
1742 	if ((pkt = uma_zalloc(wg_packet_zone, M_NOWAIT | M_ZERO)) == NULL)
1743 		return (NULL);
1744 	pkt->p_mbuf = m;
1745 	return (pkt);
1746 }
1747 
1748 static void
1749 wg_packet_free(struct wg_packet *pkt)
1750 {
1751 	if (pkt->p_keypair != NULL)
1752 		noise_keypair_put(pkt->p_keypair);
1753 	if (pkt->p_mbuf != NULL)
1754 		m_freem(pkt->p_mbuf);
1755 	uma_zfree(wg_packet_zone, pkt);
1756 }
1757 
1758 static void
1759 wg_queue_init(struct wg_queue *queue, const char *name)
1760 {
1761 	mtx_init(&queue->q_mtx, name, NULL, MTX_DEF);
1762 	STAILQ_INIT(&queue->q_queue);
1763 	queue->q_len = 0;
1764 }
1765 
1766 static void
1767 wg_queue_deinit(struct wg_queue *queue)
1768 {
1769 	wg_queue_purge(queue);
1770 	mtx_destroy(&queue->q_mtx);
1771 }
1772 
1773 static size_t
1774 wg_queue_len(struct wg_queue *queue)
1775 {
1776 	return (queue->q_len);
1777 }
1778 
1779 static int
1780 wg_queue_enqueue_handshake(struct wg_queue *hs, struct wg_packet *pkt)
1781 {
1782 	int ret = 0;
1783 	mtx_lock(&hs->q_mtx);
1784 	if (hs->q_len < MAX_QUEUED_HANDSHAKES) {
1785 		STAILQ_INSERT_TAIL(&hs->q_queue, pkt, p_parallel);
1786 		hs->q_len++;
1787 	} else {
1788 		ret = ENOBUFS;
1789 	}
1790 	mtx_unlock(&hs->q_mtx);
1791 	if (ret != 0)
1792 		wg_packet_free(pkt);
1793 	return (ret);
1794 }
1795 
1796 static struct wg_packet *
1797 wg_queue_dequeue_handshake(struct wg_queue *hs)
1798 {
1799 	struct wg_packet *pkt;
1800 	mtx_lock(&hs->q_mtx);
1801 	if ((pkt = STAILQ_FIRST(&hs->q_queue)) != NULL) {
1802 		STAILQ_REMOVE_HEAD(&hs->q_queue, p_parallel);
1803 		hs->q_len--;
1804 	}
1805 	mtx_unlock(&hs->q_mtx);
1806 	return (pkt);
1807 }
1808 
1809 static void
1810 wg_queue_push_staged(struct wg_queue *staged, struct wg_packet *pkt)
1811 {
1812 	struct wg_packet *old = NULL;
1813 
1814 	mtx_lock(&staged->q_mtx);
1815 	if (staged->q_len >= MAX_STAGED_PKT) {
1816 		old = STAILQ_FIRST(&staged->q_queue);
1817 		STAILQ_REMOVE_HEAD(&staged->q_queue, p_parallel);
1818 		staged->q_len--;
1819 	}
1820 	STAILQ_INSERT_TAIL(&staged->q_queue, pkt, p_parallel);
1821 	staged->q_len++;
1822 	mtx_unlock(&staged->q_mtx);
1823 
1824 	if (old != NULL)
1825 		wg_packet_free(old);
1826 }
1827 
1828 static void
1829 wg_queue_enlist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1830 {
1831 	struct wg_packet *pkt, *tpkt;
1832 	STAILQ_FOREACH_SAFE(pkt, list, p_parallel, tpkt)
1833 		wg_queue_push_staged(staged, pkt);
1834 }
1835 
1836 static void
1837 wg_queue_delist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1838 {
1839 	STAILQ_INIT(list);
1840 	mtx_lock(&staged->q_mtx);
1841 	STAILQ_CONCAT(list, &staged->q_queue);
1842 	staged->q_len = 0;
1843 	mtx_unlock(&staged->q_mtx);
1844 }
1845 
1846 static void
1847 wg_queue_purge(struct wg_queue *staged)
1848 {
1849 	struct wg_packet_list list;
1850 	struct wg_packet *pkt, *tpkt;
1851 	wg_queue_delist_staged(staged, &list);
1852 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt)
1853 		wg_packet_free(pkt);
1854 }
1855 
1856 static int
1857 wg_queue_both(struct wg_queue *parallel, struct wg_queue *serial, struct wg_packet *pkt)
1858 {
1859 	pkt->p_state = WG_PACKET_UNCRYPTED;
1860 
1861 	mtx_lock(&serial->q_mtx);
1862 	if (serial->q_len < MAX_QUEUED_PKT) {
1863 		serial->q_len++;
1864 		STAILQ_INSERT_TAIL(&serial->q_queue, pkt, p_serial);
1865 	} else {
1866 		mtx_unlock(&serial->q_mtx);
1867 		wg_packet_free(pkt);
1868 		return (ENOBUFS);
1869 	}
1870 	mtx_unlock(&serial->q_mtx);
1871 
1872 	mtx_lock(&parallel->q_mtx);
1873 	if (parallel->q_len < MAX_QUEUED_PKT) {
1874 		parallel->q_len++;
1875 		STAILQ_INSERT_TAIL(&parallel->q_queue, pkt, p_parallel);
1876 	} else {
1877 		mtx_unlock(&parallel->q_mtx);
1878 		pkt->p_state = WG_PACKET_DEAD;
1879 		return (ENOBUFS);
1880 	}
1881 	mtx_unlock(&parallel->q_mtx);
1882 
1883 	return (0);
1884 }
1885 
1886 static struct wg_packet *
1887 wg_queue_dequeue_serial(struct wg_queue *serial)
1888 {
1889 	struct wg_packet *pkt = NULL;
1890 	mtx_lock(&serial->q_mtx);
1891 	if (serial->q_len > 0 && STAILQ_FIRST(&serial->q_queue)->p_state != WG_PACKET_UNCRYPTED) {
1892 		serial->q_len--;
1893 		pkt = STAILQ_FIRST(&serial->q_queue);
1894 		STAILQ_REMOVE_HEAD(&serial->q_queue, p_serial);
1895 	}
1896 	mtx_unlock(&serial->q_mtx);
1897 	return (pkt);
1898 }
1899 
1900 static struct wg_packet *
1901 wg_queue_dequeue_parallel(struct wg_queue *parallel)
1902 {
1903 	struct wg_packet *pkt = NULL;
1904 	mtx_lock(&parallel->q_mtx);
1905 	if (parallel->q_len > 0) {
1906 		parallel->q_len--;
1907 		pkt = STAILQ_FIRST(&parallel->q_queue);
1908 		STAILQ_REMOVE_HEAD(&parallel->q_queue, p_parallel);
1909 	}
1910 	mtx_unlock(&parallel->q_mtx);
1911 	return (pkt);
1912 }
1913 
1914 static bool
1915 wg_input(struct mbuf *m, int offset, struct inpcb *inpcb,
1916     const struct sockaddr *sa, void *_sc)
1917 {
1918 #ifdef INET
1919 	const struct sockaddr_in	*sin;
1920 #endif
1921 #ifdef INET6
1922 	const struct sockaddr_in6	*sin6;
1923 #endif
1924 	struct noise_remote		*remote;
1925 	struct wg_pkt_data		*data;
1926 	struct wg_packet		*pkt;
1927 	struct wg_peer			*peer;
1928 	struct wg_softc			*sc = _sc;
1929 	struct mbuf			*defragged;
1930 
1931 	defragged = m_defrag(m, M_NOWAIT);
1932 	if (defragged)
1933 		m = defragged;
1934 	m = m_unshare(m, M_NOWAIT);
1935 	if (!m) {
1936 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1937 		return true;
1938 	}
1939 
1940 	/* Caller provided us with `sa`, no need for this header. */
1941 	m_adj(m, offset + sizeof(struct udphdr));
1942 
1943 	/* Pullup enough to read packet type */
1944 	if ((m = m_pullup(m, sizeof(uint32_t))) == NULL) {
1945 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1946 		return true;
1947 	}
1948 
1949 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1950 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1951 		m_freem(m);
1952 		return true;
1953 	}
1954 
1955 	/* Save send/recv address and port for later. */
1956 	switch (sa->sa_family) {
1957 #ifdef INET
1958 	case AF_INET:
1959 		sin = (const struct sockaddr_in *)sa;
1960 		pkt->p_endpoint.e_remote.r_sin = sin[0];
1961 		pkt->p_endpoint.e_local.l_in = sin[1].sin_addr;
1962 		break;
1963 #endif
1964 #ifdef INET6
1965 	case AF_INET6:
1966 		sin6 = (const struct sockaddr_in6 *)sa;
1967 		pkt->p_endpoint.e_remote.r_sin6 = sin6[0];
1968 		pkt->p_endpoint.e_local.l_in6 = sin6[1].sin6_addr;
1969 		break;
1970 #endif
1971 	default:
1972 		goto error;
1973 	}
1974 
1975 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
1976 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
1977 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
1978 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
1979 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
1980 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
1981 
1982 		if (wg_queue_enqueue_handshake(&sc->sc_handshake_queue, pkt) != 0) {
1983 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1984 			DPRINTF(sc, "Dropping handshake packet\n");
1985 		}
1986 		GROUPTASK_ENQUEUE(&sc->sc_handshake);
1987 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
1988 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
1989 
1990 		/* Pullup whole header to read r_idx below. */
1991 		if ((pkt->p_mbuf = m_pullup(m, sizeof(struct wg_pkt_data))) == NULL)
1992 			goto error;
1993 
1994 		data = mtod(pkt->p_mbuf, struct wg_pkt_data *);
1995 		if ((pkt->p_keypair = noise_keypair_lookup(sc->sc_local, data->r_idx)) == NULL)
1996 			goto error;
1997 
1998 		remote = noise_keypair_remote(pkt->p_keypair);
1999 		peer = noise_remote_arg(remote);
2000 		if (wg_queue_both(&sc->sc_decrypt_parallel, &peer->p_decrypt_serial, pkt) != 0)
2001 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
2002 		wg_decrypt_dispatch(sc);
2003 		noise_remote_put(remote);
2004 	} else {
2005 		goto error;
2006 	}
2007 	return true;
2008 error:
2009 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2010 	wg_packet_free(pkt);
2011 	return true;
2012 }
2013 
2014 static void
2015 wg_peer_send_staged(struct wg_peer *peer)
2016 {
2017 	struct wg_packet_list	 list;
2018 	struct noise_keypair	*keypair;
2019 	struct wg_packet	*pkt, *tpkt;
2020 	struct wg_softc		*sc = peer->p_sc;
2021 
2022 	wg_queue_delist_staged(&peer->p_stage_queue, &list);
2023 
2024 	if (STAILQ_EMPTY(&list))
2025 		return;
2026 
2027 	if ((keypair = noise_keypair_current(peer->p_remote)) == NULL)
2028 		goto error;
2029 
2030 	STAILQ_FOREACH(pkt, &list, p_parallel) {
2031 		if (noise_keypair_nonce_next(keypair, &pkt->p_nonce) != 0)
2032 			goto error_keypair;
2033 	}
2034 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt) {
2035 		pkt->p_keypair = noise_keypair_ref(keypair);
2036 		if (wg_queue_both(&sc->sc_encrypt_parallel, &peer->p_encrypt_serial, pkt) != 0)
2037 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
2038 	}
2039 	wg_encrypt_dispatch(sc);
2040 	noise_keypair_put(keypair);
2041 	return;
2042 
2043 error_keypair:
2044 	noise_keypair_put(keypair);
2045 error:
2046 	wg_queue_enlist_staged(&peer->p_stage_queue, &list);
2047 	wg_timers_event_want_initiation(peer);
2048 }
2049 
2050 static inline void
2051 xmit_err(struct ifnet *ifp, struct mbuf *m, struct wg_packet *pkt, sa_family_t af)
2052 {
2053 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2054 	switch (af) {
2055 #ifdef INET
2056 	case AF_INET:
2057 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
2058 		if (pkt)
2059 			pkt->p_mbuf = NULL;
2060 		m = NULL;
2061 		break;
2062 #endif
2063 #ifdef INET6
2064 	case AF_INET6:
2065 		icmp6_error(m, ICMP6_DST_UNREACH, 0, 0);
2066 		if (pkt)
2067 			pkt->p_mbuf = NULL;
2068 		m = NULL;
2069 		break;
2070 #endif
2071 	}
2072 	if (pkt)
2073 		wg_packet_free(pkt);
2074 	else if (m)
2075 		m_freem(m);
2076 }
2077 
2078 static int
2079 wg_xmit(struct ifnet *ifp, struct mbuf *m, sa_family_t af, uint32_t mtu)
2080 {
2081 	struct wg_packet	*pkt = NULL;
2082 	struct wg_softc		*sc = ifp->if_softc;
2083 	struct wg_peer		*peer;
2084 	int			 rc = 0;
2085 	sa_family_t		 peer_af;
2086 
2087 	/* Work around lifetime issue in the ipv6 mld code. */
2088 	if (__predict_false((ifp->if_flags & IFF_DYING) || !sc)) {
2089 		rc = ENXIO;
2090 		goto err_xmit;
2091 	}
2092 
2093 	if ((pkt = wg_packet_alloc(m)) == NULL) {
2094 		rc = ENOBUFS;
2095 		goto err_xmit;
2096 	}
2097 	pkt->p_mtu = mtu;
2098 	pkt->p_af = af;
2099 
2100 	if (af == AF_INET) {
2101 		peer = wg_aip_lookup(sc, AF_INET, &mtod(m, struct ip *)->ip_dst);
2102 	} else if (af == AF_INET6) {
2103 		peer = wg_aip_lookup(sc, AF_INET6, &mtod(m, struct ip6_hdr *)->ip6_dst);
2104 	} else {
2105 		rc = EAFNOSUPPORT;
2106 		goto err_xmit;
2107 	}
2108 
2109 	BPF_MTAP2_AF(ifp, m, pkt->p_af);
2110 
2111 	if (__predict_false(peer == NULL)) {
2112 		rc = ENOKEY;
2113 		goto err_xmit;
2114 	}
2115 
2116 	if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_WGLOOP, MAX_LOOPS))) {
2117 		DPRINTF(sc, "Packet looped");
2118 		rc = ELOOP;
2119 		goto err_peer;
2120 	}
2121 
2122 	peer_af = peer->p_endpoint.e_remote.r_sa.sa_family;
2123 	if (__predict_false(peer_af != AF_INET && peer_af != AF_INET6)) {
2124 		DPRINTF(sc, "No valid endpoint has been configured or "
2125 			    "discovered for peer %" PRIu64 "\n", peer->p_id);
2126 		rc = EHOSTUNREACH;
2127 		goto err_peer;
2128 	}
2129 
2130 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
2131 	wg_peer_send_staged(peer);
2132 	noise_remote_put(peer->p_remote);
2133 	return (0);
2134 
2135 err_peer:
2136 	noise_remote_put(peer->p_remote);
2137 err_xmit:
2138 	xmit_err(ifp, m, pkt, af);
2139 	return (rc);
2140 }
2141 
2142 static inline int
2143 determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
2144 {
2145 	u_char ipv;
2146 	if ((*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2147 		*m = m_pullup(*m, sizeof(struct ip6_hdr));
2148 	else if ((*m)->m_pkthdr.len >= sizeof(struct ip))
2149 		*m = m_pullup(*m, sizeof(struct ip));
2150 	else
2151 		return (EAFNOSUPPORT);
2152 	if (*m == NULL)
2153 		return (ENOBUFS);
2154 	ipv = mtod(*m, struct ip *)->ip_v;
2155 	if (ipv == 4)
2156 		*af = AF_INET;
2157 	else if (ipv == 6 && (*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2158 		*af = AF_INET6;
2159 	else
2160 		return (EAFNOSUPPORT);
2161 	return (0);
2162 }
2163 
2164 static int
2165 wg_transmit(struct ifnet *ifp, struct mbuf *m)
2166 {
2167 	sa_family_t af;
2168 	int ret;
2169 	struct mbuf *defragged;
2170 
2171 	defragged = m_defrag(m, M_NOWAIT);
2172 	if (defragged)
2173 		m = defragged;
2174 	m = m_unshare(m, M_NOWAIT);
2175 	if (!m) {
2176 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2177 		return (ENOBUFS);
2178 	}
2179 
2180 	ret = determine_af_and_pullup(&m, &af);
2181 	if (ret) {
2182 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2183 		return (ret);
2184 	}
2185 	return (wg_xmit(ifp, m, af, ifp->if_mtu));
2186 }
2187 
2188 static int
2189 wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro)
2190 {
2191 	sa_family_t parsed_af;
2192 	uint32_t af, mtu;
2193 	int ret;
2194 	struct mbuf *defragged;
2195 
2196 	if (dst->sa_family == AF_UNSPEC)
2197 		memcpy(&af, dst->sa_data, sizeof(af));
2198 	else
2199 		af = dst->sa_family;
2200 	if (af == AF_UNSPEC) {
2201 		xmit_err(ifp, m, NULL, af);
2202 		return (EAFNOSUPPORT);
2203 	}
2204 
2205 	defragged = m_defrag(m, M_NOWAIT);
2206 	if (defragged)
2207 		m = defragged;
2208 	m = m_unshare(m, M_NOWAIT);
2209 	if (!m) {
2210 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2211 		return (ENOBUFS);
2212 	}
2213 
2214 	ret = determine_af_and_pullup(&m, &parsed_af);
2215 	if (ret) {
2216 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2217 		return (ret);
2218 	}
2219 	if (parsed_af != af) {
2220 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2221 		return (EAFNOSUPPORT);
2222 	}
2223 	mtu = (ro != NULL && ro->ro_mtu > 0) ? ro->ro_mtu : ifp->if_mtu;
2224 	return (wg_xmit(ifp, m, parsed_af, mtu));
2225 }
2226 
2227 static int
2228 wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
2229 {
2230 	uint8_t			 public[WG_KEY_SIZE];
2231 	const void *pub_key, *preshared_key = NULL;
2232 	const struct sockaddr *endpoint;
2233 	int err;
2234 	size_t size;
2235 	struct noise_remote *remote;
2236 	struct wg_peer *peer = NULL;
2237 	bool need_insert = false;
2238 
2239 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2240 
2241 	if (!nvlist_exists_binary(nvl, "public-key")) {
2242 		return (EINVAL);
2243 	}
2244 	pub_key = nvlist_get_binary(nvl, "public-key", &size);
2245 	if (size != WG_KEY_SIZE) {
2246 		return (EINVAL);
2247 	}
2248 	if (noise_local_keys(sc->sc_local, public, NULL) == 0 &&
2249 	    bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
2250 		return (0); // Silently ignored; not actually a failure.
2251 	}
2252 	if ((remote = noise_remote_lookup(sc->sc_local, pub_key)) != NULL)
2253 		peer = noise_remote_arg(remote);
2254 	if (nvlist_exists_bool(nvl, "remove") &&
2255 		nvlist_get_bool(nvl, "remove")) {
2256 		if (remote != NULL) {
2257 			wg_peer_destroy(peer);
2258 			noise_remote_put(remote);
2259 		}
2260 		return (0);
2261 	}
2262 	if (nvlist_exists_bool(nvl, "replace-allowedips") &&
2263 		nvlist_get_bool(nvl, "replace-allowedips") &&
2264 	    peer != NULL) {
2265 
2266 		wg_aip_remove_all(sc, peer);
2267 	}
2268 	if (peer == NULL) {
2269 		peer = wg_peer_alloc(sc, pub_key);
2270 		need_insert = true;
2271 	}
2272 	if (nvlist_exists_binary(nvl, "endpoint")) {
2273 		endpoint = nvlist_get_binary(nvl, "endpoint", &size);
2274 		if (size > sizeof(peer->p_endpoint.e_remote)) {
2275 			err = EINVAL;
2276 			goto out;
2277 		}
2278 		memcpy(&peer->p_endpoint.e_remote, endpoint, size);
2279 	}
2280 	if (nvlist_exists_binary(nvl, "preshared-key")) {
2281 		preshared_key = nvlist_get_binary(nvl, "preshared-key", &size);
2282 		if (size != WG_KEY_SIZE) {
2283 			err = EINVAL;
2284 			goto out;
2285 		}
2286 		noise_remote_set_psk(peer->p_remote, preshared_key);
2287 	}
2288 	if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
2289 		uint64_t pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
2290 		if (pki > UINT16_MAX) {
2291 			err = EINVAL;
2292 			goto out;
2293 		}
2294 		wg_timers_set_persistent_keepalive(peer, pki);
2295 	}
2296 	if (nvlist_exists_nvlist_array(nvl, "allowed-ips")) {
2297 		const void *addr;
2298 		uint64_t cidr;
2299 		const nvlist_t * const * aipl;
2300 		size_t allowedip_count;
2301 
2302 		aipl = nvlist_get_nvlist_array(nvl, "allowed-ips", &allowedip_count);
2303 		for (size_t idx = 0; idx < allowedip_count; idx++) {
2304 			if (!nvlist_exists_number(aipl[idx], "cidr"))
2305 				continue;
2306 			cidr = nvlist_get_number(aipl[idx], "cidr");
2307 			if (nvlist_exists_binary(aipl[idx], "ipv4")) {
2308 				addr = nvlist_get_binary(aipl[idx], "ipv4", &size);
2309 				if (addr == NULL || cidr > 32 || size != sizeof(struct in_addr)) {
2310 					err = EINVAL;
2311 					goto out;
2312 				}
2313 				if ((err = wg_aip_add(sc, peer, AF_INET, addr, cidr)) != 0)
2314 					goto out;
2315 			} else if (nvlist_exists_binary(aipl[idx], "ipv6")) {
2316 				addr = nvlist_get_binary(aipl[idx], "ipv6", &size);
2317 				if (addr == NULL || cidr > 128 || size != sizeof(struct in6_addr)) {
2318 					err = EINVAL;
2319 					goto out;
2320 				}
2321 				if ((err = wg_aip_add(sc, peer, AF_INET6, addr, cidr)) != 0)
2322 					goto out;
2323 			} else {
2324 				continue;
2325 			}
2326 		}
2327 	}
2328 	if (need_insert) {
2329 		if ((err = noise_remote_enable(peer->p_remote)) != 0)
2330 			goto out;
2331 		TAILQ_INSERT_TAIL(&sc->sc_peers, peer, p_entry);
2332 		sc->sc_peers_num++;
2333 		if (sc->sc_ifp->if_link_state == LINK_STATE_UP)
2334 			wg_timers_enable(peer);
2335 	}
2336 	if (remote != NULL)
2337 		noise_remote_put(remote);
2338 	return (0);
2339 out:
2340 	if (need_insert) /* If we fail, only destroy if it was new. */
2341 		wg_peer_destroy(peer);
2342 	if (remote != NULL)
2343 		noise_remote_put(remote);
2344 	return (err);
2345 }
2346 
2347 static int
2348 wgc_set(struct wg_softc *sc, struct wg_data_io *wgd)
2349 {
2350 	uint8_t public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2351 	struct ifnet *ifp;
2352 	void *nvlpacked;
2353 	nvlist_t *nvl;
2354 	ssize_t size;
2355 	int err;
2356 
2357 	ifp = sc->sc_ifp;
2358 	if (wgd->wgd_size == 0 || wgd->wgd_data == NULL)
2359 		return (EFAULT);
2360 
2361 	/* Can nvlists be streamed in? It's not nice to impose arbitrary limits like that but
2362 	 * there needs to be _some_ limitation. */
2363 	if (wgd->wgd_size >= UINT32_MAX / 2)
2364 		return (E2BIG);
2365 
2366 	nvlpacked = malloc(wgd->wgd_size, M_TEMP, M_WAITOK | M_ZERO);
2367 
2368 	err = copyin(wgd->wgd_data, nvlpacked, wgd->wgd_size);
2369 	if (err)
2370 		goto out;
2371 	nvl = nvlist_unpack(nvlpacked, wgd->wgd_size, 0);
2372 	if (nvl == NULL) {
2373 		err = EBADMSG;
2374 		goto out;
2375 	}
2376 	sx_xlock(&sc->sc_lock);
2377 	if (nvlist_exists_bool(nvl, "replace-peers") &&
2378 		nvlist_get_bool(nvl, "replace-peers"))
2379 		wg_peer_destroy_all(sc);
2380 	if (nvlist_exists_number(nvl, "listen-port")) {
2381 		uint64_t new_port = nvlist_get_number(nvl, "listen-port");
2382 		if (new_port > UINT16_MAX) {
2383 			err = EINVAL;
2384 			goto out_locked;
2385 		}
2386 		if (new_port != sc->sc_socket.so_port) {
2387 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2388 				if ((err = wg_socket_init(sc, new_port)) != 0)
2389 					goto out_locked;
2390 			} else
2391 				sc->sc_socket.so_port = new_port;
2392 		}
2393 	}
2394 	if (nvlist_exists_binary(nvl, "private-key")) {
2395 		const void *key = nvlist_get_binary(nvl, "private-key", &size);
2396 		if (size != WG_KEY_SIZE) {
2397 			err = EINVAL;
2398 			goto out_locked;
2399 		}
2400 
2401 		if (noise_local_keys(sc->sc_local, NULL, private) != 0 ||
2402 		    timingsafe_bcmp(private, key, WG_KEY_SIZE) != 0) {
2403 			struct wg_peer *peer;
2404 
2405 			if (curve25519_generate_public(public, key)) {
2406 				/* Peer conflict: remove conflicting peer. */
2407 				struct noise_remote *remote;
2408 				if ((remote = noise_remote_lookup(sc->sc_local,
2409 				    public)) != NULL) {
2410 					peer = noise_remote_arg(remote);
2411 					wg_peer_destroy(peer);
2412 					noise_remote_put(remote);
2413 				}
2414 			}
2415 
2416 			/*
2417 			 * Set the private key and invalidate all existing
2418 			 * handshakes.
2419 			 */
2420 			/* Note: we might be removing the private key. */
2421 			noise_local_private(sc->sc_local, key);
2422 			if (noise_local_keys(sc->sc_local, NULL, NULL) == 0)
2423 				cookie_checker_update(&sc->sc_cookie, public);
2424 			else
2425 				cookie_checker_update(&sc->sc_cookie, NULL);
2426 		}
2427 	}
2428 	if (nvlist_exists_number(nvl, "user-cookie")) {
2429 		uint64_t user_cookie = nvlist_get_number(nvl, "user-cookie");
2430 		if (user_cookie > UINT32_MAX) {
2431 			err = EINVAL;
2432 			goto out_locked;
2433 		}
2434 		err = wg_socket_set_cookie(sc, user_cookie);
2435 		if (err)
2436 			goto out_locked;
2437 	}
2438 	if (nvlist_exists_nvlist_array(nvl, "peers")) {
2439 		size_t peercount;
2440 		const nvlist_t * const*nvl_peers;
2441 
2442 		nvl_peers = nvlist_get_nvlist_array(nvl, "peers", &peercount);
2443 		for (int i = 0; i < peercount; i++) {
2444 			err = wg_peer_add(sc, nvl_peers[i]);
2445 			if (err != 0)
2446 				goto out_locked;
2447 		}
2448 	}
2449 
2450 out_locked:
2451 	sx_xunlock(&sc->sc_lock);
2452 	nvlist_destroy(nvl);
2453 out:
2454 	zfree(nvlpacked, M_TEMP);
2455 	return (err);
2456 }
2457 
2458 static int
2459 wgc_get(struct wg_softc *sc, struct wg_data_io *wgd)
2460 {
2461 	uint8_t public_key[WG_KEY_SIZE] = { 0 };
2462 	uint8_t private_key[WG_KEY_SIZE] = { 0 };
2463 	uint8_t preshared_key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
2464 	nvlist_t *nvl, *nvl_peer, *nvl_aip, **nvl_peers, **nvl_aips;
2465 	size_t size, peer_count, aip_count, i, j;
2466 	struct wg_timespec64 ts64;
2467 	struct wg_peer *peer;
2468 	struct wg_aip *aip;
2469 	void *packed;
2470 	int err = 0;
2471 
2472 	nvl = nvlist_create(0);
2473 	if (!nvl)
2474 		return (ENOMEM);
2475 
2476 	sx_slock(&sc->sc_lock);
2477 
2478 	if (sc->sc_socket.so_port != 0)
2479 		nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
2480 	if (sc->sc_socket.so_user_cookie != 0)
2481 		nvlist_add_number(nvl, "user-cookie", sc->sc_socket.so_user_cookie);
2482 	if (noise_local_keys(sc->sc_local, public_key, private_key) == 0) {
2483 		nvlist_add_binary(nvl, "public-key", public_key, WG_KEY_SIZE);
2484 		if (wgc_privileged(sc))
2485 			nvlist_add_binary(nvl, "private-key", private_key, WG_KEY_SIZE);
2486 		explicit_bzero(private_key, sizeof(private_key));
2487 	}
2488 	peer_count = sc->sc_peers_num;
2489 	if (peer_count) {
2490 		nvl_peers = mallocarray(peer_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2491 		i = 0;
2492 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2493 			if (i >= peer_count)
2494 				panic("peers changed from under us");
2495 
2496 			nvl_peers[i++] = nvl_peer = nvlist_create(0);
2497 			if (!nvl_peer) {
2498 				err = ENOMEM;
2499 				goto err_peer;
2500 			}
2501 
2502 			(void)noise_remote_keys(peer->p_remote, public_key, preshared_key);
2503 			nvlist_add_binary(nvl_peer, "public-key", public_key, sizeof(public_key));
2504 			if (wgc_privileged(sc))
2505 				nvlist_add_binary(nvl_peer, "preshared-key", preshared_key, sizeof(preshared_key));
2506 			explicit_bzero(preshared_key, sizeof(preshared_key));
2507 			if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET)
2508 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in));
2509 			else if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET6)
2510 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in6));
2511 			wg_timers_get_last_handshake(peer, &ts64);
2512 			nvlist_add_binary(nvl_peer, "last-handshake-time", &ts64, sizeof(ts64));
2513 			nvlist_add_number(nvl_peer, "persistent-keepalive-interval", peer->p_persistent_keepalive_interval);
2514 			nvlist_add_number(nvl_peer, "rx-bytes", counter_u64_fetch(peer->p_rx_bytes));
2515 			nvlist_add_number(nvl_peer, "tx-bytes", counter_u64_fetch(peer->p_tx_bytes));
2516 
2517 			aip_count = peer->p_aips_num;
2518 			if (aip_count) {
2519 				nvl_aips = mallocarray(aip_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2520 				j = 0;
2521 				LIST_FOREACH(aip, &peer->p_aips, a_entry) {
2522 					if (j >= aip_count)
2523 						panic("aips changed from under us");
2524 
2525 					nvl_aips[j++] = nvl_aip = nvlist_create(0);
2526 					if (!nvl_aip) {
2527 						err = ENOMEM;
2528 						goto err_aip;
2529 					}
2530 					if (aip->a_af == AF_INET) {
2531 						nvlist_add_binary(nvl_aip, "ipv4", &aip->a_addr.in, sizeof(aip->a_addr.in));
2532 						nvlist_add_number(nvl_aip, "cidr", bitcount32(aip->a_mask.ip));
2533 					}
2534 #ifdef INET6
2535 					else if (aip->a_af == AF_INET6) {
2536 						nvlist_add_binary(nvl_aip, "ipv6", &aip->a_addr.in6, sizeof(aip->a_addr.in6));
2537 						nvlist_add_number(nvl_aip, "cidr", in6_mask2len(&aip->a_mask.in6, NULL));
2538 					}
2539 #endif
2540 				}
2541 				nvlist_add_nvlist_array(nvl_peer, "allowed-ips", (const nvlist_t *const *)nvl_aips, aip_count);
2542 			err_aip:
2543 				for (j = 0; j < aip_count; ++j)
2544 					nvlist_destroy(nvl_aips[j]);
2545 				free(nvl_aips, M_NVLIST);
2546 				if (err)
2547 					goto err_peer;
2548 			}
2549 		}
2550 		nvlist_add_nvlist_array(nvl, "peers", (const nvlist_t * const *)nvl_peers, peer_count);
2551 	err_peer:
2552 		for (i = 0; i < peer_count; ++i)
2553 			nvlist_destroy(nvl_peers[i]);
2554 		free(nvl_peers, M_NVLIST);
2555 		if (err) {
2556 			sx_sunlock(&sc->sc_lock);
2557 			goto err;
2558 		}
2559 	}
2560 	sx_sunlock(&sc->sc_lock);
2561 	packed = nvlist_pack(nvl, &size);
2562 	if (!packed) {
2563 		err = ENOMEM;
2564 		goto err;
2565 	}
2566 	if (!wgd->wgd_size) {
2567 		wgd->wgd_size = size;
2568 		goto out;
2569 	}
2570 	if (wgd->wgd_size < size) {
2571 		err = ENOSPC;
2572 		goto out;
2573 	}
2574 	err = copyout(packed, wgd->wgd_data, size);
2575 	wgd->wgd_size = size;
2576 
2577 out:
2578 	zfree(packed, M_NVLIST);
2579 err:
2580 	nvlist_destroy(nvl);
2581 	return (err);
2582 }
2583 
2584 static int
2585 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2586 {
2587 	struct wg_data_io *wgd = (struct wg_data_io *)data;
2588 	struct ifreq *ifr = (struct ifreq *)data;
2589 	struct wg_softc *sc;
2590 	int ret = 0;
2591 
2592 	sx_slock(&wg_sx);
2593 	sc = ifp->if_softc;
2594 	if (!sc) {
2595 		ret = ENXIO;
2596 		goto out;
2597 	}
2598 
2599 	switch (cmd) {
2600 	case SIOCSWG:
2601 		ret = priv_check(curthread, PRIV_NET_WG);
2602 		if (ret == 0)
2603 			ret = wgc_set(sc, wgd);
2604 		break;
2605 	case SIOCGWG:
2606 		ret = wgc_get(sc, wgd);
2607 		break;
2608 	/* Interface IOCTLs */
2609 	case SIOCSIFADDR:
2610 		/*
2611 		 * This differs from *BSD norms, but is more uniform with how
2612 		 * WireGuard behaves elsewhere.
2613 		 */
2614 		break;
2615 	case SIOCSIFFLAGS:
2616 		if (ifp->if_flags & IFF_UP)
2617 			ret = wg_up(sc);
2618 		else
2619 			wg_down(sc);
2620 		break;
2621 	case SIOCSIFMTU:
2622 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
2623 			ret = EINVAL;
2624 		else
2625 			ifp->if_mtu = ifr->ifr_mtu;
2626 		break;
2627 	case SIOCADDMULTI:
2628 	case SIOCDELMULTI:
2629 		break;
2630 	case SIOCGTUNFIB:
2631 		ifr->ifr_fib = sc->sc_socket.so_fibnum;
2632 		break;
2633 	case SIOCSTUNFIB:
2634 		ret = priv_check(curthread, PRIV_NET_WG);
2635 		if (ret)
2636 			break;
2637 		ret = priv_check(curthread, PRIV_NET_SETIFFIB);
2638 		if (ret)
2639 			break;
2640 		sx_xlock(&sc->sc_lock);
2641 		ret = wg_socket_set_fibnum(sc, ifr->ifr_fib);
2642 		sx_xunlock(&sc->sc_lock);
2643 		break;
2644 	default:
2645 		ret = ENOTTY;
2646 	}
2647 
2648 out:
2649 	sx_sunlock(&wg_sx);
2650 	return (ret);
2651 }
2652 
2653 static int
2654 wg_up(struct wg_softc *sc)
2655 {
2656 	struct ifnet *ifp = sc->sc_ifp;
2657 	struct wg_peer *peer;
2658 	int rc = EBUSY;
2659 
2660 	sx_xlock(&sc->sc_lock);
2661 	/* Jail's being removed, no more wg_up(). */
2662 	if ((sc->sc_flags & WGF_DYING) != 0)
2663 		goto out;
2664 
2665 	/* Silent success if we're already running. */
2666 	rc = 0;
2667 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2668 		goto out;
2669 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2670 
2671 	rc = wg_socket_init(sc, sc->sc_socket.so_port);
2672 	if (rc == 0) {
2673 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
2674 			wg_timers_enable(peer);
2675 		if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
2676 	} else {
2677 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2678 		DPRINTF(sc, "Unable to initialize sockets: %d\n", rc);
2679 	}
2680 out:
2681 	sx_xunlock(&sc->sc_lock);
2682 	return (rc);
2683 }
2684 
2685 static void
2686 wg_down(struct wg_softc *sc)
2687 {
2688 	struct ifnet *ifp = sc->sc_ifp;
2689 	struct wg_peer *peer;
2690 
2691 	sx_xlock(&sc->sc_lock);
2692 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2693 		sx_xunlock(&sc->sc_lock);
2694 		return;
2695 	}
2696 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2697 
2698 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2699 		wg_queue_purge(&peer->p_stage_queue);
2700 		wg_timers_disable(peer);
2701 	}
2702 
2703 	wg_queue_purge(&sc->sc_handshake_queue);
2704 
2705 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2706 		noise_remote_handshake_clear(peer->p_remote);
2707 		noise_remote_keypairs_clear(peer->p_remote);
2708 	}
2709 
2710 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2711 	wg_socket_uninit(sc);
2712 
2713 	sx_xunlock(&sc->sc_lock);
2714 }
2715 
2716 static int
2717 wg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2718 {
2719 	struct wg_softc *sc;
2720 	struct ifnet *ifp;
2721 
2722 	sc = malloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
2723 
2724 	sc->sc_local = noise_local_alloc(sc);
2725 
2726 	sc->sc_encrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2727 
2728 	sc->sc_decrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2729 
2730 	if (!rn_inithead((void **)&sc->sc_aip4, offsetof(struct aip_addr, in) * NBBY))
2731 		goto free_decrypt;
2732 
2733 	if (!rn_inithead((void **)&sc->sc_aip6, offsetof(struct aip_addr, in6) * NBBY))
2734 		goto free_aip4;
2735 
2736 	atomic_add_int(&clone_count, 1);
2737 	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
2738 
2739 	sc->sc_ucred = crhold(curthread->td_ucred);
2740 	sc->sc_socket.so_fibnum = curthread->td_proc->p_fibnum;
2741 	sc->sc_socket.so_port = 0;
2742 
2743 	TAILQ_INIT(&sc->sc_peers);
2744 	sc->sc_peers_num = 0;
2745 
2746 	cookie_checker_init(&sc->sc_cookie);
2747 
2748 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip4);
2749 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip6);
2750 
2751 	GROUPTASK_INIT(&sc->sc_handshake, 0, (gtask_fn_t *)wg_softc_handshake_receive, sc);
2752 	taskqgroup_attach(qgroup_wg_tqg, &sc->sc_handshake, sc, NULL, NULL, "wg tx initiation");
2753 	wg_queue_init(&sc->sc_handshake_queue, "hsq");
2754 
2755 	for (int i = 0; i < mp_ncpus; i++) {
2756 		GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
2757 		     (gtask_fn_t *)wg_softc_encrypt, sc);
2758 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_encrypt[i], sc, i, NULL, NULL, "wg encrypt");
2759 		GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
2760 		    (gtask_fn_t *)wg_softc_decrypt, sc);
2761 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_decrypt[i], sc, i, NULL, NULL, "wg decrypt");
2762 	}
2763 
2764 	wg_queue_init(&sc->sc_encrypt_parallel, "encp");
2765 	wg_queue_init(&sc->sc_decrypt_parallel, "decp");
2766 
2767 	sx_init(&sc->sc_lock, "wg softc lock");
2768 
2769 	ifp->if_softc = sc;
2770 	ifp->if_capabilities = ifp->if_capenable = WG_CAPS;
2771 	if_initname(ifp, wgname, unit);
2772 
2773 	if_setmtu(ifp, DEFAULT_MTU);
2774 	ifp->if_flags = IFF_NOARP | IFF_MULTICAST;
2775 	ifp->if_init = wg_init;
2776 	ifp->if_reassign = wg_reassign;
2777 	ifp->if_qflush = wg_qflush;
2778 	ifp->if_transmit = wg_transmit;
2779 	ifp->if_output = wg_output;
2780 	ifp->if_ioctl = wg_ioctl;
2781 	if_attach(ifp);
2782 	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
2783 #ifdef INET6
2784 	ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
2785 	ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD;
2786 #endif
2787 	sx_xlock(&wg_sx);
2788 	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
2789 	sx_xunlock(&wg_sx);
2790 	return (0);
2791 free_aip4:
2792 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2793 	free(sc->sc_aip4, M_RTABLE);
2794 free_decrypt:
2795 	free(sc->sc_decrypt, M_WG);
2796 	free(sc->sc_encrypt, M_WG);
2797 	noise_local_free(sc->sc_local, NULL);
2798 	free(sc, M_WG);
2799 	return (ENOMEM);
2800 }
2801 
2802 static void
2803 wg_clone_deferred_free(struct noise_local *l)
2804 {
2805 	struct wg_softc *sc = noise_local_arg(l);
2806 
2807 	free(sc, M_WG);
2808 	atomic_add_int(&clone_count, -1);
2809 }
2810 
2811 static void
2812 wg_clone_destroy(struct ifnet *ifp)
2813 {
2814 	struct wg_softc *sc = ifp->if_softc;
2815 	struct ucred *cred;
2816 
2817 	sx_xlock(&wg_sx);
2818 	ifp->if_softc = NULL;
2819 	sx_xlock(&sc->sc_lock);
2820 	sc->sc_flags |= WGF_DYING;
2821 	cred = sc->sc_ucred;
2822 	sc->sc_ucred = NULL;
2823 	sx_xunlock(&sc->sc_lock);
2824 	LIST_REMOVE(sc, sc_entry);
2825 	sx_xunlock(&wg_sx);
2826 
2827 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2828 	CURVNET_SET(sc->sc_ifp->if_vnet);
2829 	if_purgeaddrs(sc->sc_ifp);
2830 	CURVNET_RESTORE();
2831 
2832 	sx_xlock(&sc->sc_lock);
2833 	wg_socket_uninit(sc);
2834 	sx_xunlock(&sc->sc_lock);
2835 
2836 	/*
2837 	 * No guarantees that all traffic have passed until the epoch has
2838 	 * elapsed with the socket closed.
2839 	 */
2840 	NET_EPOCH_WAIT();
2841 
2842 	taskqgroup_drain_all(qgroup_wg_tqg);
2843 	sx_xlock(&sc->sc_lock);
2844 	wg_peer_destroy_all(sc);
2845 	NET_EPOCH_DRAIN_CALLBACKS();
2846 	sx_xunlock(&sc->sc_lock);
2847 	sx_destroy(&sc->sc_lock);
2848 	taskqgroup_detach(qgroup_wg_tqg, &sc->sc_handshake);
2849 	for (int i = 0; i < mp_ncpus; i++) {
2850 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_encrypt[i]);
2851 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_decrypt[i]);
2852 	}
2853 	free(sc->sc_encrypt, M_WG);
2854 	free(sc->sc_decrypt, M_WG);
2855 	wg_queue_deinit(&sc->sc_handshake_queue);
2856 	wg_queue_deinit(&sc->sc_encrypt_parallel);
2857 	wg_queue_deinit(&sc->sc_decrypt_parallel);
2858 
2859 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2860 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip6);
2861 	rn_detachhead((void **)&sc->sc_aip4);
2862 	rn_detachhead((void **)&sc->sc_aip6);
2863 
2864 	cookie_checker_free(&sc->sc_cookie);
2865 
2866 	if (cred != NULL)
2867 		crfree(cred);
2868 	if_detach(sc->sc_ifp);
2869 	if_free(sc->sc_ifp);
2870 
2871 	noise_local_free(sc->sc_local, wg_clone_deferred_free);
2872 }
2873 
2874 static void
2875 wg_qflush(struct ifnet *ifp __unused)
2876 {
2877 }
2878 
2879 /*
2880  * Privileged information (private-key, preshared-key) are only exported for
2881  * root and jailed root by default.
2882  */
2883 static bool
2884 wgc_privileged(struct wg_softc *sc)
2885 {
2886 	struct thread *td;
2887 
2888 	td = curthread;
2889 	return (priv_check(td, PRIV_NET_WG) == 0);
2890 }
2891 
2892 static void
2893 wg_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
2894     char *unused __unused)
2895 {
2896 	struct wg_softc *sc;
2897 
2898 	sc = ifp->if_softc;
2899 	wg_down(sc);
2900 }
2901 
2902 static void
2903 wg_init(void *xsc)
2904 {
2905 	struct wg_softc *sc;
2906 
2907 	sc = xsc;
2908 	wg_up(sc);
2909 }
2910 
2911 static void
2912 vnet_wg_init(const void *unused __unused)
2913 {
2914 	V_wg_cloner = if_clone_simple(wgname, wg_clone_create, wg_clone_destroy,
2915 				      0);
2916 }
2917 VNET_SYSINIT(vnet_wg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2918 	     vnet_wg_init, NULL);
2919 
2920 static void
2921 vnet_wg_uninit(const void *unused __unused)
2922 {
2923 	if (V_wg_cloner)
2924 		if_clone_detach(V_wg_cloner);
2925 }
2926 VNET_SYSUNINIT(vnet_wg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2927 	       vnet_wg_uninit, NULL);
2928 
2929 static int
2930 wg_prison_remove(void *obj, void *data __unused)
2931 {
2932 	const struct prison *pr = obj;
2933 	struct wg_softc *sc;
2934 
2935 	/*
2936 	 * Do a pass through all if_wg interfaces and release creds on any from
2937 	 * the jail that are supposed to be going away.  This will, in turn, let
2938 	 * the jail die so that we don't end up with Schrödinger's jail.
2939 	 */
2940 	sx_slock(&wg_sx);
2941 	LIST_FOREACH(sc, &wg_list, sc_entry) {
2942 		sx_xlock(&sc->sc_lock);
2943 		if (!(sc->sc_flags & WGF_DYING) && sc->sc_ucred && sc->sc_ucred->cr_prison == pr) {
2944 			struct ucred *cred = sc->sc_ucred;
2945 			DPRINTF(sc, "Creating jail exiting\n");
2946 			if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2947 			wg_socket_uninit(sc);
2948 			sc->sc_ucred = NULL;
2949 			crfree(cred);
2950 			sc->sc_flags |= WGF_DYING;
2951 		}
2952 		sx_xunlock(&sc->sc_lock);
2953 	}
2954 	sx_sunlock(&wg_sx);
2955 
2956 	return (0);
2957 }
2958 
2959 #ifdef SELFTESTS
2960 #include "selftest/allowedips.c"
2961 static bool wg_run_selftests(void)
2962 {
2963 	bool ret = true;
2964 	ret &= wg_allowedips_selftest();
2965 	ret &= noise_counter_selftest();
2966 	ret &= cookie_selftest();
2967 	return ret;
2968 }
2969 #else
2970 static inline bool wg_run_selftests(void) { return true; }
2971 #endif
2972 
2973 static int
2974 wg_module_init(void)
2975 {
2976 	int ret = ENOMEM;
2977 
2978 	osd_method_t methods[PR_MAXMETHOD] = {
2979 		[PR_METHOD_REMOVE] = wg_prison_remove,
2980 	};
2981 
2982 	if ((wg_packet_zone = uma_zcreate("wg packet", sizeof(struct wg_packet),
2983 	     NULL, NULL, NULL, NULL, 0, 0)) == NULL)
2984 		goto free_none;
2985 	ret = crypto_init();
2986 	if (ret != 0)
2987 		goto free_zone;
2988 	if (cookie_init() != 0)
2989 		goto free_crypto;
2990 
2991 	wg_osd_jail_slot = osd_jail_register(NULL, methods);
2992 
2993 	ret = ENOTRECOVERABLE;
2994 	if (!wg_run_selftests())
2995 		goto free_all;
2996 
2997 	return (0);
2998 
2999 free_all:
3000 	osd_jail_deregister(wg_osd_jail_slot);
3001 	cookie_deinit();
3002 free_crypto:
3003 	crypto_deinit();
3004 free_zone:
3005 	uma_zdestroy(wg_packet_zone);
3006 free_none:
3007 	return (ret);
3008 }
3009 
3010 static void
3011 wg_module_deinit(void)
3012 {
3013 	VNET_ITERATOR_DECL(vnet_iter);
3014 	VNET_LIST_RLOCK();
3015 	VNET_FOREACH(vnet_iter) {
3016 		struct if_clone *clone = VNET_VNET(vnet_iter, wg_cloner);
3017 		if (clone) {
3018 			if_clone_detach(clone);
3019 			VNET_VNET(vnet_iter, wg_cloner) = NULL;
3020 		}
3021 	}
3022 	VNET_LIST_RUNLOCK();
3023 	NET_EPOCH_WAIT();
3024 	MPASS(LIST_EMPTY(&wg_list));
3025 	osd_jail_deregister(wg_osd_jail_slot);
3026 	cookie_deinit();
3027 	crypto_deinit();
3028 	uma_zdestroy(wg_packet_zone);
3029 }
3030 
3031 static int
3032 wg_module_event_handler(module_t mod, int what, void *arg)
3033 {
3034 	switch (what) {
3035 		case MOD_LOAD:
3036 			return wg_module_init();
3037 		case MOD_UNLOAD:
3038 			wg_module_deinit();
3039 			break;
3040 		default:
3041 			return (EOPNOTSUPP);
3042 	}
3043 	return (0);
3044 }
3045 
3046 static moduledata_t wg_moduledata = {
3047 	wgname,
3048 	wg_module_event_handler,
3049 	NULL
3050 };
3051 
3052 DECLARE_MODULE(wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
3053 MODULE_VERSION(wg, WIREGUARD_VERSION);
3054 MODULE_DEPEND(wg, crypto, 1, 1, 1);
3055