xref: /freebsd/sys/dev/wg/if_wg.c (revision f126890ac5386406dadf7c4cfa9566cbb56537c5)
1 /* SPDX-License-Identifier: ISC
2  *
3  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4  * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
5  * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
6  * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
7  * Copyright (c) 2022 The FreeBSD Foundation
8  */
9 
10 #include "opt_inet.h"
11 #include "opt_inet6.h"
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/counter.h>
16 #include <sys/gtaskqueue.h>
17 #include <sys/jail.h>
18 #include <sys/kernel.h>
19 #include <sys/lock.h>
20 #include <sys/mbuf.h>
21 #include <sys/module.h>
22 #include <sys/nv.h>
23 #include <sys/priv.h>
24 #include <sys/protosw.h>
25 #include <sys/rmlock.h>
26 #include <sys/rwlock.h>
27 #include <sys/smp.h>
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/sockio.h>
31 #include <sys/sysctl.h>
32 #include <sys/sx.h>
33 #include <machine/_inttypes.h>
34 #include <net/bpf.h>
35 #include <net/ethernet.h>
36 #include <net/if.h>
37 #include <net/if_clone.h>
38 #include <net/if_types.h>
39 #include <net/if_var.h>
40 #include <net/netisr.h>
41 #include <net/radix.h>
42 #include <netinet/in.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip6.h>
46 #include <netinet/ip_icmp.h>
47 #include <netinet/icmp6.h>
48 #include <netinet/udp_var.h>
49 #include <netinet6/nd6.h>
50 
51 #include "wg_noise.h"
52 #include "wg_cookie.h"
53 #include "version.h"
54 #include "if_wg.h"
55 
56 #define DEFAULT_MTU		(ETHERMTU - 80)
57 #define MAX_MTU			(IF_MAXMTU - 80)
58 
59 #define MAX_STAGED_PKT		128
60 #define MAX_QUEUED_PKT		1024
61 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
62 
63 #define MAX_QUEUED_HANDSHAKES	4096
64 
65 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
66 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
67 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
68 #define UNDERLOAD_TIMEOUT	1
69 
70 #define DPRINTF(sc, ...) if (if_getflags(sc->sc_ifp) & IFF_DEBUG) if_printf(sc->sc_ifp, ##__VA_ARGS__)
71 
72 /* First byte indicating packet type on the wire */
73 #define WG_PKT_INITIATION htole32(1)
74 #define WG_PKT_RESPONSE htole32(2)
75 #define WG_PKT_COOKIE htole32(3)
76 #define WG_PKT_DATA htole32(4)
77 
78 #define WG_PKT_PADDING		16
79 #define WG_KEY_SIZE		32
80 
81 struct wg_pkt_initiation {
82 	uint32_t		t;
83 	uint32_t		s_idx;
84 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
85 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
86 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
87 	struct cookie_macs	m;
88 };
89 
90 struct wg_pkt_response {
91 	uint32_t		t;
92 	uint32_t		s_idx;
93 	uint32_t		r_idx;
94 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
95 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
96 	struct cookie_macs	m;
97 };
98 
99 struct wg_pkt_cookie {
100 	uint32_t		t;
101 	uint32_t		r_idx;
102 	uint8_t			nonce[COOKIE_NONCE_SIZE];
103 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
104 };
105 
106 struct wg_pkt_data {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint64_t		nonce;
110 	uint8_t			buf[];
111 };
112 
113 struct wg_endpoint {
114 	union {
115 		struct sockaddr		r_sa;
116 		struct sockaddr_in	r_sin;
117 #ifdef INET6
118 		struct sockaddr_in6	r_sin6;
119 #endif
120 	} e_remote;
121 	union {
122 		struct in_addr		l_in;
123 #ifdef INET6
124 		struct in6_pktinfo	l_pktinfo6;
125 #define l_in6 l_pktinfo6.ipi6_addr
126 #endif
127 	} e_local;
128 };
129 
130 struct aip_addr {
131 	uint8_t		length;
132 	union {
133 		uint8_t		bytes[16];
134 		uint32_t	ip;
135 		uint32_t	ip6[4];
136 		struct in_addr	in;
137 		struct in6_addr	in6;
138 	};
139 };
140 
141 struct wg_aip {
142 	struct radix_node	 a_nodes[2];
143 	LIST_ENTRY(wg_aip)	 a_entry;
144 	struct aip_addr		 a_addr;
145 	struct aip_addr		 a_mask;
146 	struct wg_peer		*a_peer;
147 	sa_family_t		 a_af;
148 };
149 
150 struct wg_packet {
151 	STAILQ_ENTRY(wg_packet)	 p_serial;
152 	STAILQ_ENTRY(wg_packet)	 p_parallel;
153 	struct wg_endpoint	 p_endpoint;
154 	struct noise_keypair	*p_keypair;
155 	uint64_t		 p_nonce;
156 	struct mbuf		*p_mbuf;
157 	int			 p_mtu;
158 	sa_family_t		 p_af;
159 	enum wg_ring_state {
160 		WG_PACKET_UNCRYPTED,
161 		WG_PACKET_CRYPTED,
162 		WG_PACKET_DEAD,
163 	}			 p_state;
164 };
165 
166 STAILQ_HEAD(wg_packet_list, wg_packet);
167 
168 struct wg_queue {
169 	struct mtx		 q_mtx;
170 	struct wg_packet_list	 q_queue;
171 	size_t			 q_len;
172 };
173 
174 struct wg_peer {
175 	TAILQ_ENTRY(wg_peer)		 p_entry;
176 	uint64_t			 p_id;
177 	struct wg_softc			*p_sc;
178 
179 	struct noise_remote		*p_remote;
180 	struct cookie_maker		 p_cookie;
181 
182 	struct rwlock			 p_endpoint_lock;
183 	struct wg_endpoint		 p_endpoint;
184 
185 	struct wg_queue	 		 p_stage_queue;
186 	struct wg_queue	 		 p_encrypt_serial;
187 	struct wg_queue	 		 p_decrypt_serial;
188 
189 	bool				 p_enabled;
190 	bool				 p_need_another_keepalive;
191 	uint16_t			 p_persistent_keepalive_interval;
192 	struct callout			 p_new_handshake;
193 	struct callout			 p_send_keepalive;
194 	struct callout			 p_retry_handshake;
195 	struct callout			 p_zero_key_material;
196 	struct callout			 p_persistent_keepalive;
197 
198 	struct mtx			 p_handshake_mtx;
199 	struct timespec			 p_handshake_complete;	/* nanotime */
200 	int				 p_handshake_retries;
201 
202 	struct grouptask		 p_send;
203 	struct grouptask		 p_recv;
204 
205 	counter_u64_t			 p_tx_bytes;
206 	counter_u64_t			 p_rx_bytes;
207 
208 	LIST_HEAD(, wg_aip)		 p_aips;
209 	size_t				 p_aips_num;
210 };
211 
212 struct wg_socket {
213 	struct socket	*so_so4;
214 	struct socket	*so_so6;
215 	uint32_t	 so_user_cookie;
216 	int		 so_fibnum;
217 	in_port_t	 so_port;
218 };
219 
220 struct wg_softc {
221 	LIST_ENTRY(wg_softc)	 sc_entry;
222 	if_t			 sc_ifp;
223 	int			 sc_flags;
224 
225 	struct ucred		*sc_ucred;
226 	struct wg_socket	 sc_socket;
227 
228 	TAILQ_HEAD(,wg_peer)	 sc_peers;
229 	size_t			 sc_peers_num;
230 
231 	struct noise_local	*sc_local;
232 	struct cookie_checker	 sc_cookie;
233 
234 	struct radix_node_head	*sc_aip4;
235 	struct radix_node_head	*sc_aip6;
236 
237 	struct grouptask	 sc_handshake;
238 	struct wg_queue		 sc_handshake_queue;
239 
240 	struct grouptask	*sc_encrypt;
241 	struct grouptask	*sc_decrypt;
242 	struct wg_queue		 sc_encrypt_parallel;
243 	struct wg_queue		 sc_decrypt_parallel;
244 	u_int			 sc_encrypt_last_cpu;
245 	u_int			 sc_decrypt_last_cpu;
246 
247 	struct sx		 sc_lock;
248 };
249 
250 #define	WGF_DYING	0x0001
251 
252 #define MAX_LOOPS	8
253 #define MTAG_WGLOOP	0x77676c70 /* wglp */
254 #ifndef ENOKEY
255 #define	ENOKEY	ENOTCAPABLE
256 #endif
257 
258 #define	GROUPTASK_DRAIN(gtask)			\
259 	gtaskqueue_drain((gtask)->gt_taskqueue, &(gtask)->gt_task)
260 
261 #define BPF_MTAP2_AF(ifp, m, af) do { \
262 		uint32_t __bpf_tap_af = (af); \
263 		BPF_MTAP2(ifp, &__bpf_tap_af, sizeof(__bpf_tap_af), m); \
264 	} while (0)
265 
266 static int clone_count;
267 static uma_zone_t wg_packet_zone;
268 static volatile unsigned long peer_counter = 0;
269 static const char wgname[] = "wg";
270 static unsigned wg_osd_jail_slot;
271 
272 static struct sx wg_sx;
273 SX_SYSINIT(wg_sx, &wg_sx, "wg_sx");
274 
275 static LIST_HEAD(, wg_softc) wg_list = LIST_HEAD_INITIALIZER(wg_list);
276 
277 static TASKQGROUP_DEFINE(wg_tqg, mp_ncpus, 1);
278 
279 MALLOC_DEFINE(M_WG, "WG", "wireguard");
280 
281 VNET_DEFINE_STATIC(struct if_clone *, wg_cloner);
282 
283 #define	V_wg_cloner	VNET(wg_cloner)
284 #define	WG_CAPS		IFCAP_LINKSTATE
285 
286 struct wg_timespec64 {
287 	uint64_t	tv_sec;
288 	uint64_t	tv_nsec;
289 };
290 
291 static int wg_socket_init(struct wg_softc *, in_port_t);
292 static int wg_socket_bind(struct socket **, struct socket **, in_port_t *);
293 static void wg_socket_set(struct wg_softc *, struct socket *, struct socket *);
294 static void wg_socket_uninit(struct wg_softc *);
295 static int wg_socket_set_sockopt(struct socket *, struct socket *, int, void *, size_t);
296 static int wg_socket_set_cookie(struct wg_softc *, uint32_t);
297 static int wg_socket_set_fibnum(struct wg_softc *, int);
298 static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
299 static void wg_timers_enable(struct wg_peer *);
300 static void wg_timers_disable(struct wg_peer *);
301 static void wg_timers_set_persistent_keepalive(struct wg_peer *, uint16_t);
302 static void wg_timers_get_last_handshake(struct wg_peer *, struct wg_timespec64 *);
303 static void wg_timers_event_data_sent(struct wg_peer *);
304 static void wg_timers_event_data_received(struct wg_peer *);
305 static void wg_timers_event_any_authenticated_packet_sent(struct wg_peer *);
306 static void wg_timers_event_any_authenticated_packet_received(struct wg_peer *);
307 static void wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *);
308 static void wg_timers_event_handshake_initiated(struct wg_peer *);
309 static void wg_timers_event_handshake_complete(struct wg_peer *);
310 static void wg_timers_event_session_derived(struct wg_peer *);
311 static void wg_timers_event_want_initiation(struct wg_peer *);
312 static void wg_timers_run_send_initiation(struct wg_peer *, bool);
313 static void wg_timers_run_retry_handshake(void *);
314 static void wg_timers_run_send_keepalive(void *);
315 static void wg_timers_run_new_handshake(void *);
316 static void wg_timers_run_zero_key_material(void *);
317 static void wg_timers_run_persistent_keepalive(void *);
318 static int wg_aip_add(struct wg_softc *, struct wg_peer *, sa_family_t, const void *, uint8_t);
319 static struct wg_peer *wg_aip_lookup(struct wg_softc *, sa_family_t, void *);
320 static void wg_aip_remove_all(struct wg_softc *, struct wg_peer *);
321 static struct wg_peer *wg_peer_alloc(struct wg_softc *, const uint8_t [WG_KEY_SIZE]);
322 static void wg_peer_free_deferred(struct noise_remote *);
323 static void wg_peer_destroy(struct wg_peer *);
324 static void wg_peer_destroy_all(struct wg_softc *);
325 static void wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
326 static void wg_send_initiation(struct wg_peer *);
327 static void wg_send_response(struct wg_peer *);
328 static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct wg_endpoint *);
329 static void wg_peer_set_endpoint(struct wg_peer *, struct wg_endpoint *);
330 static void wg_peer_clear_src(struct wg_peer *);
331 static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
332 static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *, size_t);
333 static void wg_send_keepalive(struct wg_peer *);
334 static void wg_handshake(struct wg_softc *, struct wg_packet *);
335 static void wg_encrypt(struct wg_softc *, struct wg_packet *);
336 static void wg_decrypt(struct wg_softc *, struct wg_packet *);
337 static void wg_softc_handshake_receive(struct wg_softc *);
338 static void wg_softc_decrypt(struct wg_softc *);
339 static void wg_softc_encrypt(struct wg_softc *);
340 static void wg_encrypt_dispatch(struct wg_softc *);
341 static void wg_decrypt_dispatch(struct wg_softc *);
342 static void wg_deliver_out(struct wg_peer *);
343 static void wg_deliver_in(struct wg_peer *);
344 static struct wg_packet *wg_packet_alloc(struct mbuf *);
345 static void wg_packet_free(struct wg_packet *);
346 static void wg_queue_init(struct wg_queue *, const char *);
347 static void wg_queue_deinit(struct wg_queue *);
348 static size_t wg_queue_len(struct wg_queue *);
349 static int wg_queue_enqueue_handshake(struct wg_queue *, struct wg_packet *);
350 static struct wg_packet *wg_queue_dequeue_handshake(struct wg_queue *);
351 static void wg_queue_push_staged(struct wg_queue *, struct wg_packet *);
352 static void wg_queue_enlist_staged(struct wg_queue *, struct wg_packet_list *);
353 static void wg_queue_delist_staged(struct wg_queue *, struct wg_packet_list *);
354 static void wg_queue_purge(struct wg_queue *);
355 static int wg_queue_both(struct wg_queue *, struct wg_queue *, struct wg_packet *);
356 static struct wg_packet *wg_queue_dequeue_serial(struct wg_queue *);
357 static struct wg_packet *wg_queue_dequeue_parallel(struct wg_queue *);
358 static bool wg_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *);
359 static void wg_peer_send_staged(struct wg_peer *);
360 static int wg_clone_create(struct if_clone *ifc, char *name, size_t len,
361 	struct ifc_data *ifd, if_t *ifpp);
362 static void wg_qflush(if_t);
363 static inline int determine_af_and_pullup(struct mbuf **m, sa_family_t *af);
364 static int wg_xmit(if_t, struct mbuf *, sa_family_t, uint32_t);
365 static int wg_transmit(if_t, struct mbuf *);
366 static int wg_output(if_t, struct mbuf *, const struct sockaddr *, struct route *);
367 static int wg_clone_destroy(struct if_clone *ifc, if_t ifp,
368 	uint32_t flags);
369 static bool wgc_privileged(struct wg_softc *);
370 static int wgc_get(struct wg_softc *, struct wg_data_io *);
371 static int wgc_set(struct wg_softc *, struct wg_data_io *);
372 static int wg_up(struct wg_softc *);
373 static void wg_down(struct wg_softc *);
374 static void wg_reassign(if_t, struct vnet *, char *unused);
375 static void wg_init(void *);
376 static int wg_ioctl(if_t, u_long, caddr_t);
377 static void vnet_wg_init(const void *);
378 static void vnet_wg_uninit(const void *);
379 static int wg_module_init(void);
380 static void wg_module_deinit(void);
381 
382 /* TODO Peer */
383 static struct wg_peer *
384 wg_peer_alloc(struct wg_softc *sc, const uint8_t pub_key[WG_KEY_SIZE])
385 {
386 	struct wg_peer *peer;
387 
388 	sx_assert(&sc->sc_lock, SX_XLOCKED);
389 
390 	peer = malloc(sizeof(*peer), M_WG, M_WAITOK | M_ZERO);
391 	peer->p_remote = noise_remote_alloc(sc->sc_local, peer, pub_key);
392 	peer->p_tx_bytes = counter_u64_alloc(M_WAITOK);
393 	peer->p_rx_bytes = counter_u64_alloc(M_WAITOK);
394 	peer->p_id = peer_counter++;
395 	peer->p_sc = sc;
396 
397 	cookie_maker_init(&peer->p_cookie, pub_key);
398 
399 	rw_init(&peer->p_endpoint_lock, "wg_peer_endpoint");
400 
401 	wg_queue_init(&peer->p_stage_queue, "stageq");
402 	wg_queue_init(&peer->p_encrypt_serial, "txq");
403 	wg_queue_init(&peer->p_decrypt_serial, "rxq");
404 
405 	peer->p_enabled = false;
406 	peer->p_need_another_keepalive = false;
407 	peer->p_persistent_keepalive_interval = 0;
408 	callout_init(&peer->p_new_handshake, true);
409 	callout_init(&peer->p_send_keepalive, true);
410 	callout_init(&peer->p_retry_handshake, true);
411 	callout_init(&peer->p_persistent_keepalive, true);
412 	callout_init(&peer->p_zero_key_material, true);
413 
414 	mtx_init(&peer->p_handshake_mtx, "peer handshake", NULL, MTX_DEF);
415 	bzero(&peer->p_handshake_complete, sizeof(peer->p_handshake_complete));
416 	peer->p_handshake_retries = 0;
417 
418 	GROUPTASK_INIT(&peer->p_send, 0, (gtask_fn_t *)wg_deliver_out, peer);
419 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_send, peer, NULL, NULL, "wg send");
420 	GROUPTASK_INIT(&peer->p_recv, 0, (gtask_fn_t *)wg_deliver_in, peer);
421 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_recv, peer, NULL, NULL, "wg recv");
422 
423 	LIST_INIT(&peer->p_aips);
424 	peer->p_aips_num = 0;
425 
426 	return (peer);
427 }
428 
429 static void
430 wg_peer_free_deferred(struct noise_remote *r)
431 {
432 	struct wg_peer *peer = noise_remote_arg(r);
433 
434 	/* While there are no references remaining, we may still have
435 	 * p_{send,recv} executing (think empty queue, but wg_deliver_{in,out}
436 	 * needs to check the queue. We should wait for them and then free. */
437 	GROUPTASK_DRAIN(&peer->p_recv);
438 	GROUPTASK_DRAIN(&peer->p_send);
439 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_recv);
440 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_send);
441 
442 	wg_queue_deinit(&peer->p_decrypt_serial);
443 	wg_queue_deinit(&peer->p_encrypt_serial);
444 	wg_queue_deinit(&peer->p_stage_queue);
445 
446 	counter_u64_free(peer->p_tx_bytes);
447 	counter_u64_free(peer->p_rx_bytes);
448 	rw_destroy(&peer->p_endpoint_lock);
449 	mtx_destroy(&peer->p_handshake_mtx);
450 
451 	cookie_maker_free(&peer->p_cookie);
452 
453 	free(peer, M_WG);
454 }
455 
456 static void
457 wg_peer_destroy(struct wg_peer *peer)
458 {
459 	struct wg_softc *sc = peer->p_sc;
460 	sx_assert(&sc->sc_lock, SX_XLOCKED);
461 
462 	/* Disable remote and timers. This will prevent any new handshakes
463 	 * occuring. */
464 	noise_remote_disable(peer->p_remote);
465 	wg_timers_disable(peer);
466 
467 	/* Now we can remove all allowed IPs so no more packets will be routed
468 	 * to the peer. */
469 	wg_aip_remove_all(sc, peer);
470 
471 	/* Remove peer from the interface, then free. Some references may still
472 	 * exist to p_remote, so noise_remote_free will wait until they're all
473 	 * put to call wg_peer_free_deferred. */
474 	sc->sc_peers_num--;
475 	TAILQ_REMOVE(&sc->sc_peers, peer, p_entry);
476 	DPRINTF(sc, "Peer %" PRIu64 " destroyed\n", peer->p_id);
477 	noise_remote_free(peer->p_remote, wg_peer_free_deferred);
478 }
479 
480 static void
481 wg_peer_destroy_all(struct wg_softc *sc)
482 {
483 	struct wg_peer *peer, *tpeer;
484 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peers, p_entry, tpeer)
485 		wg_peer_destroy(peer);
486 }
487 
488 static void
489 wg_peer_set_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
490 {
491 	MPASS(e->e_remote.r_sa.sa_family != 0);
492 	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
493 		return;
494 
495 	rw_wlock(&peer->p_endpoint_lock);
496 	peer->p_endpoint = *e;
497 	rw_wunlock(&peer->p_endpoint_lock);
498 }
499 
500 static void
501 wg_peer_clear_src(struct wg_peer *peer)
502 {
503 	rw_wlock(&peer->p_endpoint_lock);
504 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
505 	rw_wunlock(&peer->p_endpoint_lock);
506 }
507 
508 static void
509 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
510 {
511 	rw_rlock(&peer->p_endpoint_lock);
512 	*e = peer->p_endpoint;
513 	rw_runlock(&peer->p_endpoint_lock);
514 }
515 
516 /* Allowed IP */
517 static int
518 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, sa_family_t af, const void *addr, uint8_t cidr)
519 {
520 	struct radix_node_head	*root;
521 	struct radix_node	*node;
522 	struct wg_aip		*aip;
523 	int			 ret = 0;
524 
525 	aip = malloc(sizeof(*aip), M_WG, M_WAITOK | M_ZERO);
526 	aip->a_peer = peer;
527 	aip->a_af = af;
528 
529 	switch (af) {
530 #ifdef INET
531 	case AF_INET:
532 		if (cidr > 32) cidr = 32;
533 		root = sc->sc_aip4;
534 		aip->a_addr.in = *(const struct in_addr *)addr;
535 		aip->a_mask.ip = htonl(~((1LL << (32 - cidr)) - 1) & 0xffffffff);
536 		aip->a_addr.ip &= aip->a_mask.ip;
537 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
538 		break;
539 #endif
540 #ifdef INET6
541 	case AF_INET6:
542 		if (cidr > 128) cidr = 128;
543 		root = sc->sc_aip6;
544 		aip->a_addr.in6 = *(const struct in6_addr *)addr;
545 		in6_prefixlen2mask(&aip->a_mask.in6, cidr);
546 		for (int i = 0; i < 4; i++)
547 			aip->a_addr.ip6[i] &= aip->a_mask.ip6[i];
548 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
549 		break;
550 #endif
551 	default:
552 		free(aip, M_WG);
553 		return (EAFNOSUPPORT);
554 	}
555 
556 	RADIX_NODE_HEAD_LOCK(root);
557 	node = root->rnh_addaddr(&aip->a_addr, &aip->a_mask, &root->rh, aip->a_nodes);
558 	if (node == aip->a_nodes) {
559 		LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
560 		peer->p_aips_num++;
561 	} else if (!node)
562 		node = root->rnh_lookup(&aip->a_addr, &aip->a_mask, &root->rh);
563 	if (!node) {
564 		free(aip, M_WG);
565 		ret = ENOMEM;
566 	} else if (node != aip->a_nodes) {
567 		free(aip, M_WG);
568 		aip = (struct wg_aip *)node;
569 		if (aip->a_peer != peer) {
570 			LIST_REMOVE(aip, a_entry);
571 			aip->a_peer->p_aips_num--;
572 			aip->a_peer = peer;
573 			LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
574 			aip->a_peer->p_aips_num++;
575 		}
576 	}
577 	RADIX_NODE_HEAD_UNLOCK(root);
578 	return (ret);
579 }
580 
581 static struct wg_peer *
582 wg_aip_lookup(struct wg_softc *sc, sa_family_t af, void *a)
583 {
584 	struct radix_node_head	*root;
585 	struct radix_node	*node;
586 	struct wg_peer		*peer;
587 	struct aip_addr		 addr;
588 	RADIX_NODE_HEAD_RLOCK_TRACKER;
589 
590 	switch (af) {
591 	case AF_INET:
592 		root = sc->sc_aip4;
593 		memcpy(&addr.in, a, sizeof(addr.in));
594 		addr.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
595 		break;
596 	case AF_INET6:
597 		root = sc->sc_aip6;
598 		memcpy(&addr.in6, a, sizeof(addr.in6));
599 		addr.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
600 		break;
601 	default:
602 		return NULL;
603 	}
604 
605 	RADIX_NODE_HEAD_RLOCK(root);
606 	node = root->rnh_matchaddr(&addr, &root->rh);
607 	if (node != NULL) {
608 		peer = ((struct wg_aip *)node)->a_peer;
609 		noise_remote_ref(peer->p_remote);
610 	} else {
611 		peer = NULL;
612 	}
613 	RADIX_NODE_HEAD_RUNLOCK(root);
614 
615 	return (peer);
616 }
617 
618 static void
619 wg_aip_remove_all(struct wg_softc *sc, struct wg_peer *peer)
620 {
621 	struct wg_aip		*aip, *taip;
622 
623 	RADIX_NODE_HEAD_LOCK(sc->sc_aip4);
624 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
625 		if (aip->a_af == AF_INET) {
626 			if (sc->sc_aip4->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip4->rh) == NULL)
627 				panic("failed to delete aip %p", aip);
628 			LIST_REMOVE(aip, a_entry);
629 			peer->p_aips_num--;
630 			free(aip, M_WG);
631 		}
632 	}
633 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip4);
634 
635 	RADIX_NODE_HEAD_LOCK(sc->sc_aip6);
636 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
637 		if (aip->a_af == AF_INET6) {
638 			if (sc->sc_aip6->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip6->rh) == NULL)
639 				panic("failed to delete aip %p", aip);
640 			LIST_REMOVE(aip, a_entry);
641 			peer->p_aips_num--;
642 			free(aip, M_WG);
643 		}
644 	}
645 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip6);
646 
647 	if (!LIST_EMPTY(&peer->p_aips) || peer->p_aips_num != 0)
648 		panic("wg_aip_remove_all could not delete all %p", peer);
649 }
650 
651 static int
652 wg_socket_init(struct wg_softc *sc, in_port_t port)
653 {
654 	struct ucred *cred = sc->sc_ucred;
655 	struct socket *so4 = NULL, *so6 = NULL;
656 	int rc;
657 
658 	sx_assert(&sc->sc_lock, SX_XLOCKED);
659 
660 	if (!cred)
661 		return (EBUSY);
662 
663 	/*
664 	 * For socket creation, we use the creds of the thread that created the
665 	 * tunnel rather than the current thread to maintain the semantics that
666 	 * WireGuard has on Linux with network namespaces -- that the sockets
667 	 * are created in their home vnet so that they can be configured and
668 	 * functionally attached to a foreign vnet as the jail's only interface
669 	 * to the network.
670 	 */
671 #ifdef INET
672 	rc = socreate(AF_INET, &so4, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
673 	if (rc)
674 		goto out;
675 
676 	rc = udp_set_kernel_tunneling(so4, wg_input, NULL, sc);
677 	/*
678 	 * udp_set_kernel_tunneling can only fail if there is already a tunneling function set.
679 	 * This should never happen with a new socket.
680 	 */
681 	MPASS(rc == 0);
682 #endif
683 
684 #ifdef INET6
685 	rc = socreate(AF_INET6, &so6, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
686 	if (rc)
687 		goto out;
688 	rc = udp_set_kernel_tunneling(so6, wg_input, NULL, sc);
689 	MPASS(rc == 0);
690 #endif
691 
692 	if (sc->sc_socket.so_user_cookie) {
693 		rc = wg_socket_set_sockopt(so4, so6, SO_USER_COOKIE, &sc->sc_socket.so_user_cookie, sizeof(sc->sc_socket.so_user_cookie));
694 		if (rc)
695 			goto out;
696 	}
697 	rc = wg_socket_set_sockopt(so4, so6, SO_SETFIB, &sc->sc_socket.so_fibnum, sizeof(sc->sc_socket.so_fibnum));
698 	if (rc)
699 		goto out;
700 
701 	rc = wg_socket_bind(&so4, &so6, &port);
702 	if (!rc) {
703 		sc->sc_socket.so_port = port;
704 		wg_socket_set(sc, so4, so6);
705 	}
706 out:
707 	if (rc) {
708 		if (so4 != NULL)
709 			soclose(so4);
710 		if (so6 != NULL)
711 			soclose(so6);
712 	}
713 	return (rc);
714 }
715 
716 static int wg_socket_set_sockopt(struct socket *so4, struct socket *so6, int name, void *val, size_t len)
717 {
718 	int ret4 = 0, ret6 = 0;
719 	struct sockopt sopt = {
720 		.sopt_dir = SOPT_SET,
721 		.sopt_level = SOL_SOCKET,
722 		.sopt_name = name,
723 		.sopt_val = val,
724 		.sopt_valsize = len
725 	};
726 
727 	if (so4)
728 		ret4 = sosetopt(so4, &sopt);
729 	if (so6)
730 		ret6 = sosetopt(so6, &sopt);
731 	return (ret4 ?: ret6);
732 }
733 
734 static int wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
735 {
736 	struct wg_socket *so = &sc->sc_socket;
737 	int ret;
738 
739 	sx_assert(&sc->sc_lock, SX_XLOCKED);
740 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_USER_COOKIE, &user_cookie, sizeof(user_cookie));
741 	if (!ret)
742 		so->so_user_cookie = user_cookie;
743 	return (ret);
744 }
745 
746 static int wg_socket_set_fibnum(struct wg_softc *sc, int fibnum)
747 {
748 	struct wg_socket *so = &sc->sc_socket;
749 	int ret;
750 
751 	sx_assert(&sc->sc_lock, SX_XLOCKED);
752 
753 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_SETFIB, &fibnum, sizeof(fibnum));
754 	if (!ret)
755 		so->so_fibnum = fibnum;
756 	return (ret);
757 }
758 
759 static void
760 wg_socket_uninit(struct wg_softc *sc)
761 {
762 	wg_socket_set(sc, NULL, NULL);
763 }
764 
765 static void
766 wg_socket_set(struct wg_softc *sc, struct socket *new_so4, struct socket *new_so6)
767 {
768 	struct wg_socket *so = &sc->sc_socket;
769 	struct socket *so4, *so6;
770 
771 	sx_assert(&sc->sc_lock, SX_XLOCKED);
772 
773 	so4 = atomic_load_ptr(&so->so_so4);
774 	so6 = atomic_load_ptr(&so->so_so6);
775 	atomic_store_ptr(&so->so_so4, new_so4);
776 	atomic_store_ptr(&so->so_so6, new_so6);
777 
778 	if (!so4 && !so6)
779 		return;
780 	NET_EPOCH_WAIT();
781 	if (so4)
782 		soclose(so4);
783 	if (so6)
784 		soclose(so6);
785 }
786 
787 static int
788 wg_socket_bind(struct socket **in_so4, struct socket **in_so6, in_port_t *requested_port)
789 {
790 	struct socket *so4 = *in_so4, *so6 = *in_so6;
791 	int ret4 = 0, ret6 = 0;
792 	in_port_t port = *requested_port;
793 	struct sockaddr_in sin = {
794 		.sin_len = sizeof(struct sockaddr_in),
795 		.sin_family = AF_INET,
796 		.sin_port = htons(port)
797 	};
798 	struct sockaddr_in6 sin6 = {
799 		.sin6_len = sizeof(struct sockaddr_in6),
800 		.sin6_family = AF_INET6,
801 		.sin6_port = htons(port)
802 	};
803 
804 	if (so4) {
805 		ret4 = sobind(so4, (struct sockaddr *)&sin, curthread);
806 		if (ret4 && ret4 != EADDRNOTAVAIL)
807 			return (ret4);
808 		if (!ret4 && !sin.sin_port) {
809 			struct sockaddr_in bound_sin =
810 			    { .sin_len = sizeof(bound_sin) };
811 			int ret;
812 
813 			ret = sosockaddr(so4, (struct sockaddr *)&bound_sin);
814 			if (ret)
815 				return (ret);
816 			port = ntohs(bound_sin.sin_port);
817 			sin6.sin6_port = bound_sin.sin_port;
818 		}
819 	}
820 
821 	if (so6) {
822 		ret6 = sobind(so6, (struct sockaddr *)&sin6, curthread);
823 		if (ret6 && ret6 != EADDRNOTAVAIL)
824 			return (ret6);
825 		if (!ret6 && !sin6.sin6_port) {
826 			struct sockaddr_in6 bound_sin6 =
827 			    { .sin6_len = sizeof(bound_sin6) };
828 			int ret;
829 
830 			ret = sosockaddr(so6, (struct sockaddr *)&bound_sin6);
831 			if (ret)
832 				return (ret);
833 			port = ntohs(bound_sin6.sin6_port);
834 		}
835 	}
836 
837 	if (ret4 && ret6)
838 		return (ret4);
839 	*requested_port = port;
840 	if (ret4 && !ret6 && so4) {
841 		soclose(so4);
842 		*in_so4 = NULL;
843 	} else if (ret6 && !ret4 && so6) {
844 		soclose(so6);
845 		*in_so6 = NULL;
846 	}
847 	return (0);
848 }
849 
850 static int
851 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
852 {
853 	struct epoch_tracker et;
854 	struct sockaddr *sa;
855 	struct wg_socket *so = &sc->sc_socket;
856 	struct socket *so4, *so6;
857 	struct mbuf *control = NULL;
858 	int ret = 0;
859 	size_t len = m->m_pkthdr.len;
860 
861 	/* Get local control address before locking */
862 	if (e->e_remote.r_sa.sa_family == AF_INET) {
863 		if (e->e_local.l_in.s_addr != INADDR_ANY)
864 			control = sbcreatecontrol((caddr_t)&e->e_local.l_in,
865 			    sizeof(struct in_addr), IP_SENDSRCADDR,
866 			    IPPROTO_IP, M_NOWAIT);
867 #ifdef INET6
868 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
869 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
870 			control = sbcreatecontrol((caddr_t)&e->e_local.l_pktinfo6,
871 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
872 			    IPPROTO_IPV6, M_NOWAIT);
873 #endif
874 	} else {
875 		m_freem(m);
876 		return (EAFNOSUPPORT);
877 	}
878 
879 	/* Get remote address */
880 	sa = &e->e_remote.r_sa;
881 
882 	NET_EPOCH_ENTER(et);
883 	so4 = atomic_load_ptr(&so->so_so4);
884 	so6 = atomic_load_ptr(&so->so_so6);
885 	if (e->e_remote.r_sa.sa_family == AF_INET && so4 != NULL)
886 		ret = sosend(so4, sa, NULL, m, control, 0, curthread);
887 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && so6 != NULL)
888 		ret = sosend(so6, sa, NULL, m, control, 0, curthread);
889 	else {
890 		ret = ENOTCONN;
891 		m_freem(control);
892 		m_freem(m);
893 	}
894 	NET_EPOCH_EXIT(et);
895 	if (ret == 0) {
896 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
897 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
898 	}
899 	return (ret);
900 }
901 
902 static void
903 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf, size_t len)
904 {
905 	struct mbuf	*m;
906 	int		 ret = 0;
907 	bool		 retried = false;
908 
909 retry:
910 	m = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
911 	if (!m) {
912 		ret = ENOMEM;
913 		goto out;
914 	}
915 	m_copyback(m, 0, len, buf);
916 
917 	if (ret == 0) {
918 		ret = wg_send(sc, e, m);
919 		/* Retry if we couldn't bind to e->e_local */
920 		if (ret == EADDRNOTAVAIL && !retried) {
921 			bzero(&e->e_local, sizeof(e->e_local));
922 			retried = true;
923 			goto retry;
924 		}
925 	} else {
926 		ret = wg_send(sc, e, m);
927 	}
928 out:
929 	if (ret)
930 		DPRINTF(sc, "Unable to send packet: %d\n", ret);
931 }
932 
933 /* Timers */
934 static void
935 wg_timers_enable(struct wg_peer *peer)
936 {
937 	atomic_store_bool(&peer->p_enabled, true);
938 	wg_timers_run_persistent_keepalive(peer);
939 }
940 
941 static void
942 wg_timers_disable(struct wg_peer *peer)
943 {
944 	/* By setting p_enabled = false, then calling NET_EPOCH_WAIT, we can be
945 	 * sure no new handshakes are created after the wait. This is because
946 	 * all callout_resets (scheduling the callout) are guarded by
947 	 * p_enabled. We can be sure all sections that read p_enabled and then
948 	 * optionally call callout_reset are finished as they are surrounded by
949 	 * NET_EPOCH_{ENTER,EXIT}.
950 	 *
951 	 * However, as new callouts may be scheduled during NET_EPOCH_WAIT (but
952 	 * not after), we stop all callouts leaving no callouts active.
953 	 *
954 	 * We should also pull NET_EPOCH_WAIT out of the FOREACH(peer) loops, but the
955 	 * performance impact is acceptable for the time being. */
956 	atomic_store_bool(&peer->p_enabled, false);
957 	NET_EPOCH_WAIT();
958 	atomic_store_bool(&peer->p_need_another_keepalive, false);
959 
960 	callout_stop(&peer->p_new_handshake);
961 	callout_stop(&peer->p_send_keepalive);
962 	callout_stop(&peer->p_retry_handshake);
963 	callout_stop(&peer->p_persistent_keepalive);
964 	callout_stop(&peer->p_zero_key_material);
965 }
966 
967 static void
968 wg_timers_set_persistent_keepalive(struct wg_peer *peer, uint16_t interval)
969 {
970 	struct epoch_tracker et;
971 	if (interval != peer->p_persistent_keepalive_interval) {
972 		atomic_store_16(&peer->p_persistent_keepalive_interval, interval);
973 		NET_EPOCH_ENTER(et);
974 		if (atomic_load_bool(&peer->p_enabled))
975 			wg_timers_run_persistent_keepalive(peer);
976 		NET_EPOCH_EXIT(et);
977 	}
978 }
979 
980 static void
981 wg_timers_get_last_handshake(struct wg_peer *peer, struct wg_timespec64 *time)
982 {
983 	mtx_lock(&peer->p_handshake_mtx);
984 	time->tv_sec = peer->p_handshake_complete.tv_sec;
985 	time->tv_nsec = peer->p_handshake_complete.tv_nsec;
986 	mtx_unlock(&peer->p_handshake_mtx);
987 }
988 
989 static void
990 wg_timers_event_data_sent(struct wg_peer *peer)
991 {
992 	struct epoch_tracker et;
993 	NET_EPOCH_ENTER(et);
994 	if (atomic_load_bool(&peer->p_enabled) &&
995 	    !callout_pending(&peer->p_new_handshake))
996 		callout_reset(&peer->p_new_handshake, MSEC_2_TICKS(
997 		    NEW_HANDSHAKE_TIMEOUT * 1000 +
998 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
999 		    wg_timers_run_new_handshake, peer);
1000 	NET_EPOCH_EXIT(et);
1001 }
1002 
1003 static void
1004 wg_timers_event_data_received(struct wg_peer *peer)
1005 {
1006 	struct epoch_tracker et;
1007 	NET_EPOCH_ENTER(et);
1008 	if (atomic_load_bool(&peer->p_enabled)) {
1009 		if (!callout_pending(&peer->p_send_keepalive))
1010 			callout_reset(&peer->p_send_keepalive,
1011 			    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1012 			    wg_timers_run_send_keepalive, peer);
1013 		else
1014 			atomic_store_bool(&peer->p_need_another_keepalive,
1015 			    true);
1016 	}
1017 	NET_EPOCH_EXIT(et);
1018 }
1019 
1020 static void
1021 wg_timers_event_any_authenticated_packet_sent(struct wg_peer *peer)
1022 {
1023 	callout_stop(&peer->p_send_keepalive);
1024 }
1025 
1026 static void
1027 wg_timers_event_any_authenticated_packet_received(struct wg_peer *peer)
1028 {
1029 	callout_stop(&peer->p_new_handshake);
1030 }
1031 
1032 static void
1033 wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *peer)
1034 {
1035 	struct epoch_tracker et;
1036 	uint16_t interval;
1037 	NET_EPOCH_ENTER(et);
1038 	interval = atomic_load_16(&peer->p_persistent_keepalive_interval);
1039 	if (atomic_load_bool(&peer->p_enabled) && interval > 0)
1040 		callout_reset(&peer->p_persistent_keepalive,
1041 		     MSEC_2_TICKS(interval * 1000),
1042 		     wg_timers_run_persistent_keepalive, peer);
1043 	NET_EPOCH_EXIT(et);
1044 }
1045 
1046 static void
1047 wg_timers_event_handshake_initiated(struct wg_peer *peer)
1048 {
1049 	struct epoch_tracker et;
1050 	NET_EPOCH_ENTER(et);
1051 	if (atomic_load_bool(&peer->p_enabled))
1052 		callout_reset(&peer->p_retry_handshake, MSEC_2_TICKS(
1053 		    REKEY_TIMEOUT * 1000 +
1054 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
1055 		    wg_timers_run_retry_handshake, peer);
1056 	NET_EPOCH_EXIT(et);
1057 }
1058 
1059 static void
1060 wg_timers_event_handshake_complete(struct wg_peer *peer)
1061 {
1062 	struct epoch_tracker et;
1063 	NET_EPOCH_ENTER(et);
1064 	if (atomic_load_bool(&peer->p_enabled)) {
1065 		mtx_lock(&peer->p_handshake_mtx);
1066 		callout_stop(&peer->p_retry_handshake);
1067 		peer->p_handshake_retries = 0;
1068 		getnanotime(&peer->p_handshake_complete);
1069 		mtx_unlock(&peer->p_handshake_mtx);
1070 		wg_timers_run_send_keepalive(peer);
1071 	}
1072 	NET_EPOCH_EXIT(et);
1073 }
1074 
1075 static void
1076 wg_timers_event_session_derived(struct wg_peer *peer)
1077 {
1078 	struct epoch_tracker et;
1079 	NET_EPOCH_ENTER(et);
1080 	if (atomic_load_bool(&peer->p_enabled))
1081 		callout_reset(&peer->p_zero_key_material,
1082 		    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1083 		    wg_timers_run_zero_key_material, peer);
1084 	NET_EPOCH_EXIT(et);
1085 }
1086 
1087 static void
1088 wg_timers_event_want_initiation(struct wg_peer *peer)
1089 {
1090 	struct epoch_tracker et;
1091 	NET_EPOCH_ENTER(et);
1092 	if (atomic_load_bool(&peer->p_enabled))
1093 		wg_timers_run_send_initiation(peer, false);
1094 	NET_EPOCH_EXIT(et);
1095 }
1096 
1097 static void
1098 wg_timers_run_send_initiation(struct wg_peer *peer, bool is_retry)
1099 {
1100 	if (!is_retry)
1101 		peer->p_handshake_retries = 0;
1102 	if (noise_remote_initiation_expired(peer->p_remote) == ETIMEDOUT)
1103 		wg_send_initiation(peer);
1104 }
1105 
1106 static void
1107 wg_timers_run_retry_handshake(void *_peer)
1108 {
1109 	struct epoch_tracker et;
1110 	struct wg_peer *peer = _peer;
1111 
1112 	mtx_lock(&peer->p_handshake_mtx);
1113 	if (peer->p_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1114 		peer->p_handshake_retries++;
1115 		mtx_unlock(&peer->p_handshake_mtx);
1116 
1117 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1118 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1119 		    REKEY_TIMEOUT, peer->p_handshake_retries + 1);
1120 		wg_peer_clear_src(peer);
1121 		wg_timers_run_send_initiation(peer, true);
1122 	} else {
1123 		mtx_unlock(&peer->p_handshake_mtx);
1124 
1125 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1126 		    "after %d retries, giving up\n", peer->p_id,
1127 		    MAX_TIMER_HANDSHAKES + 2);
1128 
1129 		callout_stop(&peer->p_send_keepalive);
1130 		wg_queue_purge(&peer->p_stage_queue);
1131 		NET_EPOCH_ENTER(et);
1132 		if (atomic_load_bool(&peer->p_enabled) &&
1133 		    !callout_pending(&peer->p_zero_key_material))
1134 			callout_reset(&peer->p_zero_key_material,
1135 			    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1136 			    wg_timers_run_zero_key_material, peer);
1137 		NET_EPOCH_EXIT(et);
1138 	}
1139 }
1140 
1141 static void
1142 wg_timers_run_send_keepalive(void *_peer)
1143 {
1144 	struct epoch_tracker et;
1145 	struct wg_peer *peer = _peer;
1146 
1147 	wg_send_keepalive(peer);
1148 	NET_EPOCH_ENTER(et);
1149 	if (atomic_load_bool(&peer->p_enabled) &&
1150 	    atomic_load_bool(&peer->p_need_another_keepalive)) {
1151 		atomic_store_bool(&peer->p_need_another_keepalive, false);
1152 		callout_reset(&peer->p_send_keepalive,
1153 		    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1154 		    wg_timers_run_send_keepalive, peer);
1155 	}
1156 	NET_EPOCH_EXIT(et);
1157 }
1158 
1159 static void
1160 wg_timers_run_new_handshake(void *_peer)
1161 {
1162 	struct wg_peer *peer = _peer;
1163 
1164 	DPRINTF(peer->p_sc, "Retrying handshake with peer %" PRIu64 " because we "
1165 	    "stopped hearing back after %d seconds\n",
1166 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1167 
1168 	wg_peer_clear_src(peer);
1169 	wg_timers_run_send_initiation(peer, false);
1170 }
1171 
1172 static void
1173 wg_timers_run_zero_key_material(void *_peer)
1174 {
1175 	struct wg_peer *peer = _peer;
1176 
1177 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %" PRIu64 ", since we "
1178 	    "haven't received a new one in %d seconds\n",
1179 	    peer->p_id, REJECT_AFTER_TIME * 3);
1180 	noise_remote_keypairs_clear(peer->p_remote);
1181 }
1182 
1183 static void
1184 wg_timers_run_persistent_keepalive(void *_peer)
1185 {
1186 	struct wg_peer *peer = _peer;
1187 
1188 	if (atomic_load_16(&peer->p_persistent_keepalive_interval) > 0)
1189 		wg_send_keepalive(peer);
1190 }
1191 
1192 /* TODO Handshake */
1193 static void
1194 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1195 {
1196 	struct wg_endpoint endpoint;
1197 
1198 	counter_u64_add(peer->p_tx_bytes, len);
1199 	wg_timers_event_any_authenticated_packet_traversal(peer);
1200 	wg_timers_event_any_authenticated_packet_sent(peer);
1201 	wg_peer_get_endpoint(peer, &endpoint);
1202 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1203 }
1204 
1205 static void
1206 wg_send_initiation(struct wg_peer *peer)
1207 {
1208 	struct wg_pkt_initiation pkt;
1209 
1210 	if (noise_create_initiation(peer->p_remote, &pkt.s_idx, pkt.ue,
1211 	    pkt.es, pkt.ets) != 0)
1212 		return;
1213 
1214 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %" PRIu64 "\n", peer->p_id);
1215 
1216 	pkt.t = WG_PKT_INITIATION;
1217 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1218 	    sizeof(pkt) - sizeof(pkt.m));
1219 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1220 	wg_timers_event_handshake_initiated(peer);
1221 }
1222 
1223 static void
1224 wg_send_response(struct wg_peer *peer)
1225 {
1226 	struct wg_pkt_response pkt;
1227 
1228 	if (noise_create_response(peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1229 	    pkt.ue, pkt.en) != 0)
1230 		return;
1231 
1232 	DPRINTF(peer->p_sc, "Sending handshake response to peer %" PRIu64 "\n", peer->p_id);
1233 
1234 	wg_timers_event_session_derived(peer);
1235 	pkt.t = WG_PKT_RESPONSE;
1236 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1237 	     sizeof(pkt)-sizeof(pkt.m));
1238 	wg_peer_send_buf(peer, (uint8_t*)&pkt, sizeof(pkt));
1239 }
1240 
1241 static void
1242 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1243     struct wg_endpoint *e)
1244 {
1245 	struct wg_pkt_cookie	pkt;
1246 
1247 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1248 
1249 	pkt.t = WG_PKT_COOKIE;
1250 	pkt.r_idx = idx;
1251 
1252 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1253 	    pkt.ec, &e->e_remote.r_sa);
1254 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1255 }
1256 
1257 static void
1258 wg_send_keepalive(struct wg_peer *peer)
1259 {
1260 	struct wg_packet *pkt;
1261 	struct mbuf *m;
1262 
1263 	if (wg_queue_len(&peer->p_stage_queue) > 0)
1264 		goto send;
1265 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1266 		return;
1267 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1268 		m_freem(m);
1269 		return;
1270 	}
1271 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
1272 	DPRINTF(peer->p_sc, "Sending keepalive packet to peer %" PRIu64 "\n", peer->p_id);
1273 send:
1274 	wg_peer_send_staged(peer);
1275 }
1276 
1277 static void
1278 wg_handshake(struct wg_softc *sc, struct wg_packet *pkt)
1279 {
1280 	struct wg_pkt_initiation	*init;
1281 	struct wg_pkt_response		*resp;
1282 	struct wg_pkt_cookie		*cook;
1283 	struct wg_endpoint		*e;
1284 	struct wg_peer			*peer;
1285 	struct mbuf			*m;
1286 	struct noise_remote		*remote = NULL;
1287 	int				 res;
1288 	bool				 underload = false;
1289 	static sbintime_t		 wg_last_underload; /* sbinuptime */
1290 
1291 	underload = wg_queue_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES / 8;
1292 	if (underload) {
1293 		wg_last_underload = getsbinuptime();
1294 	} else if (wg_last_underload) {
1295 		underload = wg_last_underload + UNDERLOAD_TIMEOUT * SBT_1S > getsbinuptime();
1296 		if (!underload)
1297 			wg_last_underload = 0;
1298 	}
1299 
1300 	m = pkt->p_mbuf;
1301 	e = &pkt->p_endpoint;
1302 
1303 	if ((pkt->p_mbuf = m = m_pullup(m, m->m_pkthdr.len)) == NULL)
1304 		goto error;
1305 
1306 	switch (*mtod(m, uint32_t *)) {
1307 	case WG_PKT_INITIATION:
1308 		init = mtod(m, struct wg_pkt_initiation *);
1309 
1310 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1311 				init, sizeof(*init) - sizeof(init->m),
1312 				underload, &e->e_remote.r_sa,
1313 				if_getvnet(sc->sc_ifp));
1314 
1315 		if (res == EINVAL) {
1316 			DPRINTF(sc, "Invalid initiation MAC\n");
1317 			goto error;
1318 		} else if (res == ECONNREFUSED) {
1319 			DPRINTF(sc, "Handshake ratelimited\n");
1320 			goto error;
1321 		} else if (res == EAGAIN) {
1322 			wg_send_cookie(sc, &init->m, init->s_idx, e);
1323 			goto error;
1324 		} else if (res != 0) {
1325 			panic("unexpected response: %d\n", res);
1326 		}
1327 
1328 		if (noise_consume_initiation(sc->sc_local, &remote,
1329 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1330 			DPRINTF(sc, "Invalid handshake initiation\n");
1331 			goto error;
1332 		}
1333 
1334 		peer = noise_remote_arg(remote);
1335 
1336 		DPRINTF(sc, "Receiving handshake initiation from peer %" PRIu64 "\n", peer->p_id);
1337 
1338 		wg_peer_set_endpoint(peer, e);
1339 		wg_send_response(peer);
1340 		break;
1341 	case WG_PKT_RESPONSE:
1342 		resp = mtod(m, struct wg_pkt_response *);
1343 
1344 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1345 				resp, sizeof(*resp) - sizeof(resp->m),
1346 				underload, &e->e_remote.r_sa,
1347 				if_getvnet(sc->sc_ifp));
1348 
1349 		if (res == EINVAL) {
1350 			DPRINTF(sc, "Invalid response MAC\n");
1351 			goto error;
1352 		} else if (res == ECONNREFUSED) {
1353 			DPRINTF(sc, "Handshake ratelimited\n");
1354 			goto error;
1355 		} else if (res == EAGAIN) {
1356 			wg_send_cookie(sc, &resp->m, resp->s_idx, e);
1357 			goto error;
1358 		} else if (res != 0) {
1359 			panic("unexpected response: %d\n", res);
1360 		}
1361 
1362 		if (noise_consume_response(sc->sc_local, &remote,
1363 		    resp->s_idx, resp->r_idx, resp->ue, resp->en) != 0) {
1364 			DPRINTF(sc, "Invalid handshake response\n");
1365 			goto error;
1366 		}
1367 
1368 		peer = noise_remote_arg(remote);
1369 		DPRINTF(sc, "Receiving handshake response from peer %" PRIu64 "\n", peer->p_id);
1370 
1371 		wg_peer_set_endpoint(peer, e);
1372 		wg_timers_event_session_derived(peer);
1373 		wg_timers_event_handshake_complete(peer);
1374 		break;
1375 	case WG_PKT_COOKIE:
1376 		cook = mtod(m, struct wg_pkt_cookie *);
1377 
1378 		if ((remote = noise_remote_index(sc->sc_local, cook->r_idx)) == NULL) {
1379 			DPRINTF(sc, "Unknown cookie index\n");
1380 			goto error;
1381 		}
1382 
1383 		peer = noise_remote_arg(remote);
1384 
1385 		if (cookie_maker_consume_payload(&peer->p_cookie,
1386 		    cook->nonce, cook->ec) == 0) {
1387 			DPRINTF(sc, "Receiving cookie response\n");
1388 		} else {
1389 			DPRINTF(sc, "Could not decrypt cookie response\n");
1390 			goto error;
1391 		}
1392 
1393 		goto not_authenticated;
1394 	default:
1395 		panic("invalid packet in handshake queue");
1396 	}
1397 
1398 	wg_timers_event_any_authenticated_packet_received(peer);
1399 	wg_timers_event_any_authenticated_packet_traversal(peer);
1400 
1401 not_authenticated:
1402 	counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len);
1403 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1404 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1405 error:
1406 	if (remote != NULL)
1407 		noise_remote_put(remote);
1408 	wg_packet_free(pkt);
1409 }
1410 
1411 static void
1412 wg_softc_handshake_receive(struct wg_softc *sc)
1413 {
1414 	struct wg_packet *pkt;
1415 	while ((pkt = wg_queue_dequeue_handshake(&sc->sc_handshake_queue)) != NULL)
1416 		wg_handshake(sc, pkt);
1417 }
1418 
1419 static void
1420 wg_mbuf_reset(struct mbuf *m)
1421 {
1422 
1423 	struct m_tag *t, *tmp;
1424 
1425 	/*
1426 	 * We want to reset the mbuf to a newly allocated state, containing
1427 	 * just the packet contents. Unfortunately FreeBSD doesn't seem to
1428 	 * offer this anywhere, so we have to make it up as we go. If we can
1429 	 * get this in kern/kern_mbuf.c, that would be best.
1430 	 *
1431 	 * Notice: this may break things unexpectedly but it is better to fail
1432 	 *         closed in the extreme case than leak informtion in every
1433 	 *         case.
1434 	 *
1435 	 * With that said, all this attempts to do is remove any extraneous
1436 	 * information that could be present.
1437 	 */
1438 
1439 	M_ASSERTPKTHDR(m);
1440 
1441 	m->m_flags &= ~(M_BCAST|M_MCAST|M_VLANTAG|M_PROMISC|M_PROTOFLAGS);
1442 
1443 	M_HASHTYPE_CLEAR(m);
1444 #ifdef NUMA
1445         m->m_pkthdr.numa_domain = M_NODOM;
1446 #endif
1447 	SLIST_FOREACH_SAFE(t, &m->m_pkthdr.tags, m_tag_link, tmp) {
1448 		if ((t->m_tag_id != 0 || t->m_tag_cookie != MTAG_WGLOOP) &&
1449 		    t->m_tag_id != PACKET_TAG_MACLABEL)
1450 			m_tag_delete(m, t);
1451 	}
1452 
1453 	KASSERT((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0,
1454 	    ("%s: mbuf %p has a send tag", __func__, m));
1455 
1456 	m->m_pkthdr.csum_flags = 0;
1457 	m->m_pkthdr.PH_per.sixtyfour[0] = 0;
1458 	m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
1459 }
1460 
1461 static inline unsigned int
1462 calculate_padding(struct wg_packet *pkt)
1463 {
1464 	unsigned int padded_size, last_unit = pkt->p_mbuf->m_pkthdr.len;
1465 
1466 	/* Keepalive packets don't set p_mtu, but also have a length of zero. */
1467 	if (__predict_false(pkt->p_mtu == 0)) {
1468 		padded_size = (last_unit + (WG_PKT_PADDING - 1)) &
1469 		    ~(WG_PKT_PADDING - 1);
1470 		return (padded_size - last_unit);
1471 	}
1472 
1473 	if (__predict_false(last_unit > pkt->p_mtu))
1474 		last_unit %= pkt->p_mtu;
1475 
1476 	padded_size = (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
1477 	if (pkt->p_mtu < padded_size)
1478 		padded_size = pkt->p_mtu;
1479 	return (padded_size - last_unit);
1480 }
1481 
1482 static void
1483 wg_encrypt(struct wg_softc *sc, struct wg_packet *pkt)
1484 {
1485 	static const uint8_t	 padding[WG_PKT_PADDING] = { 0 };
1486 	struct wg_pkt_data	*data;
1487 	struct wg_peer		*peer;
1488 	struct noise_remote	*remote;
1489 	struct mbuf		*m;
1490 	uint32_t		 idx;
1491 	unsigned int		 padlen;
1492 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1493 
1494 	remote = noise_keypair_remote(pkt->p_keypair);
1495 	peer = noise_remote_arg(remote);
1496 	m = pkt->p_mbuf;
1497 
1498 	/* Pad the packet */
1499 	padlen = calculate_padding(pkt);
1500 	if (padlen != 0 && !m_append(m, padlen, padding))
1501 		goto out;
1502 
1503 	/* Do encryption */
1504 	if (noise_keypair_encrypt(pkt->p_keypair, &idx, pkt->p_nonce, m) != 0)
1505 		goto out;
1506 
1507 	/* Put header into packet */
1508 	M_PREPEND(m, sizeof(struct wg_pkt_data), M_NOWAIT);
1509 	if (m == NULL)
1510 		goto out;
1511 	data = mtod(m, struct wg_pkt_data *);
1512 	data->t = WG_PKT_DATA;
1513 	data->r_idx = idx;
1514 	data->nonce = htole64(pkt->p_nonce);
1515 
1516 	wg_mbuf_reset(m);
1517 	state = WG_PACKET_CRYPTED;
1518 out:
1519 	pkt->p_mbuf = m;
1520 	atomic_store_rel_int(&pkt->p_state, state);
1521 	GROUPTASK_ENQUEUE(&peer->p_send);
1522 	noise_remote_put(remote);
1523 }
1524 
1525 static void
1526 wg_decrypt(struct wg_softc *sc, struct wg_packet *pkt)
1527 {
1528 	struct wg_peer		*peer, *allowed_peer;
1529 	struct noise_remote	*remote;
1530 	struct mbuf		*m;
1531 	int			 len;
1532 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1533 
1534 	remote = noise_keypair_remote(pkt->p_keypair);
1535 	peer = noise_remote_arg(remote);
1536 	m = pkt->p_mbuf;
1537 
1538 	/* Read nonce and then adjust to remove the header. */
1539 	pkt->p_nonce = le64toh(mtod(m, struct wg_pkt_data *)->nonce);
1540 	m_adj(m, sizeof(struct wg_pkt_data));
1541 
1542 	if (noise_keypair_decrypt(pkt->p_keypair, pkt->p_nonce, m) != 0)
1543 		goto out;
1544 
1545 	/* A packet with length 0 is a keepalive packet */
1546 	if (__predict_false(m->m_pkthdr.len == 0)) {
1547 		DPRINTF(sc, "Receiving keepalive packet from peer "
1548 		    "%" PRIu64 "\n", peer->p_id);
1549 		state = WG_PACKET_CRYPTED;
1550 		goto out;
1551 	}
1552 
1553 	/*
1554 	 * We can let the network stack handle the intricate validation of the
1555 	 * IP header, we just worry about the sizeof and the version, so we can
1556 	 * read the source address in wg_aip_lookup.
1557 	 */
1558 
1559 	if (determine_af_and_pullup(&m, &pkt->p_af) == 0) {
1560 		if (pkt->p_af == AF_INET) {
1561 			struct ip *ip = mtod(m, struct ip *);
1562 			allowed_peer = wg_aip_lookup(sc, AF_INET, &ip->ip_src);
1563 			len = ntohs(ip->ip_len);
1564 			if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1565 				m_adj(m, len - m->m_pkthdr.len);
1566 		} else if (pkt->p_af == AF_INET6) {
1567 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1568 			allowed_peer = wg_aip_lookup(sc, AF_INET6, &ip6->ip6_src);
1569 			len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1570 			if (len < m->m_pkthdr.len)
1571 				m_adj(m, len - m->m_pkthdr.len);
1572 		} else
1573 			panic("determine_af_and_pullup returned unexpected value");
1574 	} else {
1575 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from peer %" PRIu64 "\n", peer->p_id);
1576 		goto out;
1577 	}
1578 
1579 	/* We only want to compare the address, not dereference, so drop the ref. */
1580 	if (allowed_peer != NULL)
1581 		noise_remote_put(allowed_peer->p_remote);
1582 
1583 	if (__predict_false(peer != allowed_peer)) {
1584 		DPRINTF(sc, "Packet has unallowed src IP from peer %" PRIu64 "\n", peer->p_id);
1585 		goto out;
1586 	}
1587 
1588 	wg_mbuf_reset(m);
1589 	state = WG_PACKET_CRYPTED;
1590 out:
1591 	pkt->p_mbuf = m;
1592 	atomic_store_rel_int(&pkt->p_state, state);
1593 	GROUPTASK_ENQUEUE(&peer->p_recv);
1594 	noise_remote_put(remote);
1595 }
1596 
1597 static void
1598 wg_softc_decrypt(struct wg_softc *sc)
1599 {
1600 	struct wg_packet *pkt;
1601 
1602 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_decrypt_parallel)) != NULL)
1603 		wg_decrypt(sc, pkt);
1604 }
1605 
1606 static void
1607 wg_softc_encrypt(struct wg_softc *sc)
1608 {
1609 	struct wg_packet *pkt;
1610 
1611 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_encrypt_parallel)) != NULL)
1612 		wg_encrypt(sc, pkt);
1613 }
1614 
1615 static void
1616 wg_encrypt_dispatch(struct wg_softc *sc)
1617 {
1618 	/*
1619 	 * The update to encrypt_last_cpu is racey such that we may
1620 	 * reschedule the task for the same CPU multiple times, but
1621 	 * the race doesn't really matter.
1622 	 */
1623 	u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus;
1624 	sc->sc_encrypt_last_cpu = cpu;
1625 	GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]);
1626 }
1627 
1628 static void
1629 wg_decrypt_dispatch(struct wg_softc *sc)
1630 {
1631 	u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus;
1632 	sc->sc_decrypt_last_cpu = cpu;
1633 	GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]);
1634 }
1635 
1636 static void
1637 wg_deliver_out(struct wg_peer *peer)
1638 {
1639 	struct wg_endpoint	 endpoint;
1640 	struct wg_softc		*sc = peer->p_sc;
1641 	struct wg_packet	*pkt;
1642 	struct mbuf		*m;
1643 	int			 rc, len;
1644 
1645 	wg_peer_get_endpoint(peer, &endpoint);
1646 
1647 	while ((pkt = wg_queue_dequeue_serial(&peer->p_encrypt_serial)) != NULL) {
1648 		if (atomic_load_acq_int(&pkt->p_state) != WG_PACKET_CRYPTED)
1649 			goto error;
1650 
1651 		m = pkt->p_mbuf;
1652 		pkt->p_mbuf = NULL;
1653 
1654 		len = m->m_pkthdr.len;
1655 
1656 		wg_timers_event_any_authenticated_packet_traversal(peer);
1657 		wg_timers_event_any_authenticated_packet_sent(peer);
1658 		rc = wg_send(sc, &endpoint, m);
1659 		if (rc == 0) {
1660 			if (len > (sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN))
1661 				wg_timers_event_data_sent(peer);
1662 			counter_u64_add(peer->p_tx_bytes, len);
1663 		} else if (rc == EADDRNOTAVAIL) {
1664 			wg_peer_clear_src(peer);
1665 			wg_peer_get_endpoint(peer, &endpoint);
1666 			goto error;
1667 		} else {
1668 			goto error;
1669 		}
1670 		wg_packet_free(pkt);
1671 		if (noise_keep_key_fresh_send(peer->p_remote))
1672 			wg_timers_event_want_initiation(peer);
1673 		continue;
1674 error:
1675 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1676 		wg_packet_free(pkt);
1677 	}
1678 }
1679 
1680 static void
1681 wg_deliver_in(struct wg_peer *peer)
1682 {
1683 	struct wg_softc		*sc = peer->p_sc;
1684 	if_t			 ifp = sc->sc_ifp;
1685 	struct wg_packet	*pkt;
1686 	struct mbuf		*m;
1687 	struct epoch_tracker	 et;
1688 
1689 	while ((pkt = wg_queue_dequeue_serial(&peer->p_decrypt_serial)) != NULL) {
1690 		if (atomic_load_acq_int(&pkt->p_state) != WG_PACKET_CRYPTED)
1691 			goto error;
1692 
1693 		m = pkt->p_mbuf;
1694 		if (noise_keypair_nonce_check(pkt->p_keypair, pkt->p_nonce) != 0)
1695 			goto error;
1696 
1697 		if (noise_keypair_received_with(pkt->p_keypair) == ECONNRESET)
1698 			wg_timers_event_handshake_complete(peer);
1699 
1700 		wg_timers_event_any_authenticated_packet_received(peer);
1701 		wg_timers_event_any_authenticated_packet_traversal(peer);
1702 		wg_peer_set_endpoint(peer, &pkt->p_endpoint);
1703 
1704 		counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len +
1705 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1706 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1707 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len +
1708 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1709 
1710 		if (m->m_pkthdr.len == 0)
1711 			goto done;
1712 
1713 		MPASS(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
1714 		pkt->p_mbuf = NULL;
1715 
1716 		m->m_pkthdr.rcvif = ifp;
1717 
1718 		NET_EPOCH_ENTER(et);
1719 		BPF_MTAP2_AF(ifp, m, pkt->p_af);
1720 
1721 		CURVNET_SET(if_getvnet(ifp));
1722 		M_SETFIB(m, if_getfib(ifp));
1723 		if (pkt->p_af == AF_INET)
1724 			netisr_dispatch(NETISR_IP, m);
1725 		if (pkt->p_af == AF_INET6)
1726 			netisr_dispatch(NETISR_IPV6, m);
1727 		CURVNET_RESTORE();
1728 		NET_EPOCH_EXIT(et);
1729 
1730 		wg_timers_event_data_received(peer);
1731 
1732 done:
1733 		if (noise_keep_key_fresh_recv(peer->p_remote))
1734 			wg_timers_event_want_initiation(peer);
1735 		wg_packet_free(pkt);
1736 		continue;
1737 error:
1738 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1739 		wg_packet_free(pkt);
1740 	}
1741 }
1742 
1743 static struct wg_packet *
1744 wg_packet_alloc(struct mbuf *m)
1745 {
1746 	struct wg_packet *pkt;
1747 
1748 	if ((pkt = uma_zalloc(wg_packet_zone, M_NOWAIT | M_ZERO)) == NULL)
1749 		return (NULL);
1750 	pkt->p_mbuf = m;
1751 	return (pkt);
1752 }
1753 
1754 static void
1755 wg_packet_free(struct wg_packet *pkt)
1756 {
1757 	if (pkt->p_keypair != NULL)
1758 		noise_keypair_put(pkt->p_keypair);
1759 	if (pkt->p_mbuf != NULL)
1760 		m_freem(pkt->p_mbuf);
1761 	uma_zfree(wg_packet_zone, pkt);
1762 }
1763 
1764 static void
1765 wg_queue_init(struct wg_queue *queue, const char *name)
1766 {
1767 	mtx_init(&queue->q_mtx, name, NULL, MTX_DEF);
1768 	STAILQ_INIT(&queue->q_queue);
1769 	queue->q_len = 0;
1770 }
1771 
1772 static void
1773 wg_queue_deinit(struct wg_queue *queue)
1774 {
1775 	wg_queue_purge(queue);
1776 	mtx_destroy(&queue->q_mtx);
1777 }
1778 
1779 static size_t
1780 wg_queue_len(struct wg_queue *queue)
1781 {
1782 	return (queue->q_len);
1783 }
1784 
1785 static int
1786 wg_queue_enqueue_handshake(struct wg_queue *hs, struct wg_packet *pkt)
1787 {
1788 	int ret = 0;
1789 	mtx_lock(&hs->q_mtx);
1790 	if (hs->q_len < MAX_QUEUED_HANDSHAKES) {
1791 		STAILQ_INSERT_TAIL(&hs->q_queue, pkt, p_parallel);
1792 		hs->q_len++;
1793 	} else {
1794 		ret = ENOBUFS;
1795 	}
1796 	mtx_unlock(&hs->q_mtx);
1797 	if (ret != 0)
1798 		wg_packet_free(pkt);
1799 	return (ret);
1800 }
1801 
1802 static struct wg_packet *
1803 wg_queue_dequeue_handshake(struct wg_queue *hs)
1804 {
1805 	struct wg_packet *pkt;
1806 	mtx_lock(&hs->q_mtx);
1807 	if ((pkt = STAILQ_FIRST(&hs->q_queue)) != NULL) {
1808 		STAILQ_REMOVE_HEAD(&hs->q_queue, p_parallel);
1809 		hs->q_len--;
1810 	}
1811 	mtx_unlock(&hs->q_mtx);
1812 	return (pkt);
1813 }
1814 
1815 static void
1816 wg_queue_push_staged(struct wg_queue *staged, struct wg_packet *pkt)
1817 {
1818 	struct wg_packet *old = NULL;
1819 
1820 	mtx_lock(&staged->q_mtx);
1821 	if (staged->q_len >= MAX_STAGED_PKT) {
1822 		old = STAILQ_FIRST(&staged->q_queue);
1823 		STAILQ_REMOVE_HEAD(&staged->q_queue, p_parallel);
1824 		staged->q_len--;
1825 	}
1826 	STAILQ_INSERT_TAIL(&staged->q_queue, pkt, p_parallel);
1827 	staged->q_len++;
1828 	mtx_unlock(&staged->q_mtx);
1829 
1830 	if (old != NULL)
1831 		wg_packet_free(old);
1832 }
1833 
1834 static void
1835 wg_queue_enlist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1836 {
1837 	struct wg_packet *pkt, *tpkt;
1838 	STAILQ_FOREACH_SAFE(pkt, list, p_parallel, tpkt)
1839 		wg_queue_push_staged(staged, pkt);
1840 }
1841 
1842 static void
1843 wg_queue_delist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1844 {
1845 	STAILQ_INIT(list);
1846 	mtx_lock(&staged->q_mtx);
1847 	STAILQ_CONCAT(list, &staged->q_queue);
1848 	staged->q_len = 0;
1849 	mtx_unlock(&staged->q_mtx);
1850 }
1851 
1852 static void
1853 wg_queue_purge(struct wg_queue *staged)
1854 {
1855 	struct wg_packet_list list;
1856 	struct wg_packet *pkt, *tpkt;
1857 	wg_queue_delist_staged(staged, &list);
1858 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt)
1859 		wg_packet_free(pkt);
1860 }
1861 
1862 static int
1863 wg_queue_both(struct wg_queue *parallel, struct wg_queue *serial, struct wg_packet *pkt)
1864 {
1865 	pkt->p_state = WG_PACKET_UNCRYPTED;
1866 
1867 	mtx_lock(&serial->q_mtx);
1868 	if (serial->q_len < MAX_QUEUED_PKT) {
1869 		serial->q_len++;
1870 		STAILQ_INSERT_TAIL(&serial->q_queue, pkt, p_serial);
1871 	} else {
1872 		mtx_unlock(&serial->q_mtx);
1873 		wg_packet_free(pkt);
1874 		return (ENOBUFS);
1875 	}
1876 	mtx_unlock(&serial->q_mtx);
1877 
1878 	mtx_lock(&parallel->q_mtx);
1879 	if (parallel->q_len < MAX_QUEUED_PKT) {
1880 		parallel->q_len++;
1881 		STAILQ_INSERT_TAIL(&parallel->q_queue, pkt, p_parallel);
1882 	} else {
1883 		mtx_unlock(&parallel->q_mtx);
1884 		pkt->p_state = WG_PACKET_DEAD;
1885 		return (ENOBUFS);
1886 	}
1887 	mtx_unlock(&parallel->q_mtx);
1888 
1889 	return (0);
1890 }
1891 
1892 static struct wg_packet *
1893 wg_queue_dequeue_serial(struct wg_queue *serial)
1894 {
1895 	struct wg_packet *pkt = NULL;
1896 	mtx_lock(&serial->q_mtx);
1897 	if (serial->q_len > 0 && STAILQ_FIRST(&serial->q_queue)->p_state != WG_PACKET_UNCRYPTED) {
1898 		serial->q_len--;
1899 		pkt = STAILQ_FIRST(&serial->q_queue);
1900 		STAILQ_REMOVE_HEAD(&serial->q_queue, p_serial);
1901 	}
1902 	mtx_unlock(&serial->q_mtx);
1903 	return (pkt);
1904 }
1905 
1906 static struct wg_packet *
1907 wg_queue_dequeue_parallel(struct wg_queue *parallel)
1908 {
1909 	struct wg_packet *pkt = NULL;
1910 	mtx_lock(&parallel->q_mtx);
1911 	if (parallel->q_len > 0) {
1912 		parallel->q_len--;
1913 		pkt = STAILQ_FIRST(&parallel->q_queue);
1914 		STAILQ_REMOVE_HEAD(&parallel->q_queue, p_parallel);
1915 	}
1916 	mtx_unlock(&parallel->q_mtx);
1917 	return (pkt);
1918 }
1919 
1920 static bool
1921 wg_input(struct mbuf *m, int offset, struct inpcb *inpcb,
1922     const struct sockaddr *sa, void *_sc)
1923 {
1924 #ifdef INET
1925 	const struct sockaddr_in	*sin;
1926 #endif
1927 #ifdef INET6
1928 	const struct sockaddr_in6	*sin6;
1929 #endif
1930 	struct noise_remote		*remote;
1931 	struct wg_pkt_data		*data;
1932 	struct wg_packet		*pkt;
1933 	struct wg_peer			*peer;
1934 	struct wg_softc			*sc = _sc;
1935 	struct mbuf			*defragged;
1936 
1937 	defragged = m_defrag(m, M_NOWAIT);
1938 	if (defragged)
1939 		m = defragged;
1940 	m = m_unshare(m, M_NOWAIT);
1941 	if (!m) {
1942 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1943 		return true;
1944 	}
1945 
1946 	/* Caller provided us with `sa`, no need for this header. */
1947 	m_adj(m, offset + sizeof(struct udphdr));
1948 
1949 	/* Pullup enough to read packet type */
1950 	if ((m = m_pullup(m, sizeof(uint32_t))) == NULL) {
1951 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1952 		return true;
1953 	}
1954 
1955 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1956 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1957 		m_freem(m);
1958 		return true;
1959 	}
1960 
1961 	/* Save send/recv address and port for later. */
1962 	switch (sa->sa_family) {
1963 #ifdef INET
1964 	case AF_INET:
1965 		sin = (const struct sockaddr_in *)sa;
1966 		pkt->p_endpoint.e_remote.r_sin = sin[0];
1967 		pkt->p_endpoint.e_local.l_in = sin[1].sin_addr;
1968 		break;
1969 #endif
1970 #ifdef INET6
1971 	case AF_INET6:
1972 		sin6 = (const struct sockaddr_in6 *)sa;
1973 		pkt->p_endpoint.e_remote.r_sin6 = sin6[0];
1974 		pkt->p_endpoint.e_local.l_in6 = sin6[1].sin6_addr;
1975 		break;
1976 #endif
1977 	default:
1978 		goto error;
1979 	}
1980 
1981 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
1982 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
1983 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
1984 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
1985 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
1986 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
1987 
1988 		if (wg_queue_enqueue_handshake(&sc->sc_handshake_queue, pkt) != 0) {
1989 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1990 			DPRINTF(sc, "Dropping handshake packet\n");
1991 		}
1992 		GROUPTASK_ENQUEUE(&sc->sc_handshake);
1993 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
1994 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
1995 
1996 		/* Pullup whole header to read r_idx below. */
1997 		if ((pkt->p_mbuf = m_pullup(m, sizeof(struct wg_pkt_data))) == NULL)
1998 			goto error;
1999 
2000 		data = mtod(pkt->p_mbuf, struct wg_pkt_data *);
2001 		if ((pkt->p_keypair = noise_keypair_lookup(sc->sc_local, data->r_idx)) == NULL)
2002 			goto error;
2003 
2004 		remote = noise_keypair_remote(pkt->p_keypair);
2005 		peer = noise_remote_arg(remote);
2006 		if (wg_queue_both(&sc->sc_decrypt_parallel, &peer->p_decrypt_serial, pkt) != 0)
2007 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
2008 		wg_decrypt_dispatch(sc);
2009 		noise_remote_put(remote);
2010 	} else {
2011 		goto error;
2012 	}
2013 	return true;
2014 error:
2015 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2016 	wg_packet_free(pkt);
2017 	return true;
2018 }
2019 
2020 static void
2021 wg_peer_send_staged(struct wg_peer *peer)
2022 {
2023 	struct wg_packet_list	 list;
2024 	struct noise_keypair	*keypair;
2025 	struct wg_packet	*pkt, *tpkt;
2026 	struct wg_softc		*sc = peer->p_sc;
2027 
2028 	wg_queue_delist_staged(&peer->p_stage_queue, &list);
2029 
2030 	if (STAILQ_EMPTY(&list))
2031 		return;
2032 
2033 	if ((keypair = noise_keypair_current(peer->p_remote)) == NULL)
2034 		goto error;
2035 
2036 	STAILQ_FOREACH(pkt, &list, p_parallel) {
2037 		if (noise_keypair_nonce_next(keypair, &pkt->p_nonce) != 0)
2038 			goto error_keypair;
2039 	}
2040 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt) {
2041 		pkt->p_keypair = noise_keypair_ref(keypair);
2042 		if (wg_queue_both(&sc->sc_encrypt_parallel, &peer->p_encrypt_serial, pkt) != 0)
2043 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
2044 	}
2045 	wg_encrypt_dispatch(sc);
2046 	noise_keypair_put(keypair);
2047 	return;
2048 
2049 error_keypair:
2050 	noise_keypair_put(keypair);
2051 error:
2052 	wg_queue_enlist_staged(&peer->p_stage_queue, &list);
2053 	wg_timers_event_want_initiation(peer);
2054 }
2055 
2056 static inline void
2057 xmit_err(if_t ifp, struct mbuf *m, struct wg_packet *pkt, sa_family_t af)
2058 {
2059 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2060 	switch (af) {
2061 #ifdef INET
2062 	case AF_INET:
2063 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
2064 		if (pkt)
2065 			pkt->p_mbuf = NULL;
2066 		m = NULL;
2067 		break;
2068 #endif
2069 #ifdef INET6
2070 	case AF_INET6:
2071 		icmp6_error(m, ICMP6_DST_UNREACH, 0, 0);
2072 		if (pkt)
2073 			pkt->p_mbuf = NULL;
2074 		m = NULL;
2075 		break;
2076 #endif
2077 	}
2078 	if (pkt)
2079 		wg_packet_free(pkt);
2080 	else if (m)
2081 		m_freem(m);
2082 }
2083 
2084 static int
2085 wg_xmit(if_t ifp, struct mbuf *m, sa_family_t af, uint32_t mtu)
2086 {
2087 	struct wg_packet	*pkt = NULL;
2088 	struct wg_softc		*sc = if_getsoftc(ifp);
2089 	struct wg_peer		*peer;
2090 	int			 rc = 0;
2091 	sa_family_t		 peer_af;
2092 
2093 	/* Work around lifetime issue in the ipv6 mld code. */
2094 	if (__predict_false((if_getflags(ifp) & IFF_DYING) || !sc)) {
2095 		rc = ENXIO;
2096 		goto err_xmit;
2097 	}
2098 
2099 	if ((pkt = wg_packet_alloc(m)) == NULL) {
2100 		rc = ENOBUFS;
2101 		goto err_xmit;
2102 	}
2103 	pkt->p_mtu = mtu;
2104 	pkt->p_af = af;
2105 
2106 	if (af == AF_INET) {
2107 		peer = wg_aip_lookup(sc, AF_INET, &mtod(m, struct ip *)->ip_dst);
2108 	} else if (af == AF_INET6) {
2109 		peer = wg_aip_lookup(sc, AF_INET6, &mtod(m, struct ip6_hdr *)->ip6_dst);
2110 	} else {
2111 		rc = EAFNOSUPPORT;
2112 		goto err_xmit;
2113 	}
2114 
2115 	BPF_MTAP2_AF(ifp, m, pkt->p_af);
2116 
2117 	if (__predict_false(peer == NULL)) {
2118 		rc = ENOKEY;
2119 		goto err_xmit;
2120 	}
2121 
2122 	if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_WGLOOP, MAX_LOOPS))) {
2123 		DPRINTF(sc, "Packet looped");
2124 		rc = ELOOP;
2125 		goto err_peer;
2126 	}
2127 
2128 	peer_af = peer->p_endpoint.e_remote.r_sa.sa_family;
2129 	if (__predict_false(peer_af != AF_INET && peer_af != AF_INET6)) {
2130 		DPRINTF(sc, "No valid endpoint has been configured or "
2131 			    "discovered for peer %" PRIu64 "\n", peer->p_id);
2132 		rc = EHOSTUNREACH;
2133 		goto err_peer;
2134 	}
2135 
2136 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
2137 	wg_peer_send_staged(peer);
2138 	noise_remote_put(peer->p_remote);
2139 	return (0);
2140 
2141 err_peer:
2142 	noise_remote_put(peer->p_remote);
2143 err_xmit:
2144 	xmit_err(ifp, m, pkt, af);
2145 	return (rc);
2146 }
2147 
2148 static inline int
2149 determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
2150 {
2151 	u_char ipv;
2152 	if ((*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2153 		*m = m_pullup(*m, sizeof(struct ip6_hdr));
2154 	else if ((*m)->m_pkthdr.len >= sizeof(struct ip))
2155 		*m = m_pullup(*m, sizeof(struct ip));
2156 	else
2157 		return (EAFNOSUPPORT);
2158 	if (*m == NULL)
2159 		return (ENOBUFS);
2160 	ipv = mtod(*m, struct ip *)->ip_v;
2161 	if (ipv == 4)
2162 		*af = AF_INET;
2163 	else if (ipv == 6 && (*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2164 		*af = AF_INET6;
2165 	else
2166 		return (EAFNOSUPPORT);
2167 	return (0);
2168 }
2169 
2170 static int
2171 wg_transmit(if_t ifp, struct mbuf *m)
2172 {
2173 	sa_family_t af;
2174 	int ret;
2175 	struct mbuf *defragged;
2176 
2177 	defragged = m_defrag(m, M_NOWAIT);
2178 	if (defragged)
2179 		m = defragged;
2180 	m = m_unshare(m, M_NOWAIT);
2181 	if (!m) {
2182 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2183 		return (ENOBUFS);
2184 	}
2185 
2186 	ret = determine_af_and_pullup(&m, &af);
2187 	if (ret) {
2188 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2189 		return (ret);
2190 	}
2191 	return (wg_xmit(ifp, m, af, if_getmtu(ifp)));
2192 }
2193 
2194 static int
2195 wg_output(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro)
2196 {
2197 	sa_family_t parsed_af;
2198 	uint32_t af, mtu;
2199 	int ret;
2200 	struct mbuf *defragged;
2201 
2202 	if (dst->sa_family == AF_UNSPEC)
2203 		memcpy(&af, dst->sa_data, sizeof(af));
2204 	else
2205 		af = dst->sa_family;
2206 	if (af == AF_UNSPEC) {
2207 		xmit_err(ifp, m, NULL, af);
2208 		return (EAFNOSUPPORT);
2209 	}
2210 
2211 	defragged = m_defrag(m, M_NOWAIT);
2212 	if (defragged)
2213 		m = defragged;
2214 	m = m_unshare(m, M_NOWAIT);
2215 	if (!m) {
2216 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2217 		return (ENOBUFS);
2218 	}
2219 
2220 	ret = determine_af_and_pullup(&m, &parsed_af);
2221 	if (ret) {
2222 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2223 		return (ret);
2224 	}
2225 	if (parsed_af != af) {
2226 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2227 		return (EAFNOSUPPORT);
2228 	}
2229 	mtu = (ro != NULL && ro->ro_mtu > 0) ? ro->ro_mtu : if_getmtu(ifp);
2230 	return (wg_xmit(ifp, m, parsed_af, mtu));
2231 }
2232 
2233 static int
2234 wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
2235 {
2236 	uint8_t			 public[WG_KEY_SIZE];
2237 	const void *pub_key, *preshared_key = NULL;
2238 	const struct sockaddr *endpoint;
2239 	int err;
2240 	size_t size;
2241 	struct noise_remote *remote;
2242 	struct wg_peer *peer = NULL;
2243 	bool need_insert = false;
2244 
2245 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2246 
2247 	if (!nvlist_exists_binary(nvl, "public-key")) {
2248 		return (EINVAL);
2249 	}
2250 	pub_key = nvlist_get_binary(nvl, "public-key", &size);
2251 	if (size != WG_KEY_SIZE) {
2252 		return (EINVAL);
2253 	}
2254 	if (noise_local_keys(sc->sc_local, public, NULL) == 0 &&
2255 	    bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
2256 		return (0); // Silently ignored; not actually a failure.
2257 	}
2258 	if ((remote = noise_remote_lookup(sc->sc_local, pub_key)) != NULL)
2259 		peer = noise_remote_arg(remote);
2260 	if (nvlist_exists_bool(nvl, "remove") &&
2261 		nvlist_get_bool(nvl, "remove")) {
2262 		if (remote != NULL) {
2263 			wg_peer_destroy(peer);
2264 			noise_remote_put(remote);
2265 		}
2266 		return (0);
2267 	}
2268 	if (nvlist_exists_bool(nvl, "replace-allowedips") &&
2269 		nvlist_get_bool(nvl, "replace-allowedips") &&
2270 	    peer != NULL) {
2271 
2272 		wg_aip_remove_all(sc, peer);
2273 	}
2274 	if (peer == NULL) {
2275 		peer = wg_peer_alloc(sc, pub_key);
2276 		need_insert = true;
2277 	}
2278 	if (nvlist_exists_binary(nvl, "endpoint")) {
2279 		endpoint = nvlist_get_binary(nvl, "endpoint", &size);
2280 		if (size > sizeof(peer->p_endpoint.e_remote)) {
2281 			err = EINVAL;
2282 			goto out;
2283 		}
2284 		memcpy(&peer->p_endpoint.e_remote, endpoint, size);
2285 	}
2286 	if (nvlist_exists_binary(nvl, "preshared-key")) {
2287 		preshared_key = nvlist_get_binary(nvl, "preshared-key", &size);
2288 		if (size != WG_KEY_SIZE) {
2289 			err = EINVAL;
2290 			goto out;
2291 		}
2292 		noise_remote_set_psk(peer->p_remote, preshared_key);
2293 	}
2294 	if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
2295 		uint64_t pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
2296 		if (pki > UINT16_MAX) {
2297 			err = EINVAL;
2298 			goto out;
2299 		}
2300 		wg_timers_set_persistent_keepalive(peer, pki);
2301 	}
2302 	if (nvlist_exists_nvlist_array(nvl, "allowed-ips")) {
2303 		const void *addr;
2304 		uint64_t cidr;
2305 		const nvlist_t * const * aipl;
2306 		size_t allowedip_count;
2307 
2308 		aipl = nvlist_get_nvlist_array(nvl, "allowed-ips", &allowedip_count);
2309 		for (size_t idx = 0; idx < allowedip_count; idx++) {
2310 			if (!nvlist_exists_number(aipl[idx], "cidr"))
2311 				continue;
2312 			cidr = nvlist_get_number(aipl[idx], "cidr");
2313 			if (nvlist_exists_binary(aipl[idx], "ipv4")) {
2314 				addr = nvlist_get_binary(aipl[idx], "ipv4", &size);
2315 				if (addr == NULL || cidr > 32 || size != sizeof(struct in_addr)) {
2316 					err = EINVAL;
2317 					goto out;
2318 				}
2319 				if ((err = wg_aip_add(sc, peer, AF_INET, addr, cidr)) != 0)
2320 					goto out;
2321 			} else if (nvlist_exists_binary(aipl[idx], "ipv6")) {
2322 				addr = nvlist_get_binary(aipl[idx], "ipv6", &size);
2323 				if (addr == NULL || cidr > 128 || size != sizeof(struct in6_addr)) {
2324 					err = EINVAL;
2325 					goto out;
2326 				}
2327 				if ((err = wg_aip_add(sc, peer, AF_INET6, addr, cidr)) != 0)
2328 					goto out;
2329 			} else {
2330 				continue;
2331 			}
2332 		}
2333 	}
2334 	if (need_insert) {
2335 		if ((err = noise_remote_enable(peer->p_remote)) != 0)
2336 			goto out;
2337 		TAILQ_INSERT_TAIL(&sc->sc_peers, peer, p_entry);
2338 		sc->sc_peers_num++;
2339 		if (if_getlinkstate(sc->sc_ifp) == LINK_STATE_UP)
2340 			wg_timers_enable(peer);
2341 	}
2342 	if (remote != NULL)
2343 		noise_remote_put(remote);
2344 	return (0);
2345 out:
2346 	if (need_insert) /* If we fail, only destroy if it was new. */
2347 		wg_peer_destroy(peer);
2348 	if (remote != NULL)
2349 		noise_remote_put(remote);
2350 	return (err);
2351 }
2352 
2353 static int
2354 wgc_set(struct wg_softc *sc, struct wg_data_io *wgd)
2355 {
2356 	uint8_t public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2357 	if_t ifp;
2358 	void *nvlpacked;
2359 	nvlist_t *nvl;
2360 	ssize_t size;
2361 	int err;
2362 
2363 	ifp = sc->sc_ifp;
2364 	if (wgd->wgd_size == 0 || wgd->wgd_data == NULL)
2365 		return (EFAULT);
2366 
2367 	/* Can nvlists be streamed in? It's not nice to impose arbitrary limits like that but
2368 	 * there needs to be _some_ limitation. */
2369 	if (wgd->wgd_size >= UINT32_MAX / 2)
2370 		return (E2BIG);
2371 
2372 	nvlpacked = malloc(wgd->wgd_size, M_TEMP, M_WAITOK | M_ZERO);
2373 
2374 	err = copyin(wgd->wgd_data, nvlpacked, wgd->wgd_size);
2375 	if (err)
2376 		goto out;
2377 	nvl = nvlist_unpack(nvlpacked, wgd->wgd_size, 0);
2378 	if (nvl == NULL) {
2379 		err = EBADMSG;
2380 		goto out;
2381 	}
2382 	sx_xlock(&sc->sc_lock);
2383 	if (nvlist_exists_bool(nvl, "replace-peers") &&
2384 		nvlist_get_bool(nvl, "replace-peers"))
2385 		wg_peer_destroy_all(sc);
2386 	if (nvlist_exists_number(nvl, "listen-port")) {
2387 		uint64_t new_port = nvlist_get_number(nvl, "listen-port");
2388 		if (new_port > UINT16_MAX) {
2389 			err = EINVAL;
2390 			goto out_locked;
2391 		}
2392 		if (new_port != sc->sc_socket.so_port) {
2393 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2394 				if ((err = wg_socket_init(sc, new_port)) != 0)
2395 					goto out_locked;
2396 			} else
2397 				sc->sc_socket.so_port = new_port;
2398 		}
2399 	}
2400 	if (nvlist_exists_binary(nvl, "private-key")) {
2401 		const void *key = nvlist_get_binary(nvl, "private-key", &size);
2402 		if (size != WG_KEY_SIZE) {
2403 			err = EINVAL;
2404 			goto out_locked;
2405 		}
2406 
2407 		if (noise_local_keys(sc->sc_local, NULL, private) != 0 ||
2408 		    timingsafe_bcmp(private, key, WG_KEY_SIZE) != 0) {
2409 			struct wg_peer *peer;
2410 
2411 			if (curve25519_generate_public(public, key)) {
2412 				/* Peer conflict: remove conflicting peer. */
2413 				struct noise_remote *remote;
2414 				if ((remote = noise_remote_lookup(sc->sc_local,
2415 				    public)) != NULL) {
2416 					peer = noise_remote_arg(remote);
2417 					wg_peer_destroy(peer);
2418 					noise_remote_put(remote);
2419 				}
2420 			}
2421 
2422 			/*
2423 			 * Set the private key and invalidate all existing
2424 			 * handshakes.
2425 			 */
2426 			/* Note: we might be removing the private key. */
2427 			noise_local_private(sc->sc_local, key);
2428 			if (noise_local_keys(sc->sc_local, NULL, NULL) == 0)
2429 				cookie_checker_update(&sc->sc_cookie, public);
2430 			else
2431 				cookie_checker_update(&sc->sc_cookie, NULL);
2432 		}
2433 	}
2434 	if (nvlist_exists_number(nvl, "user-cookie")) {
2435 		uint64_t user_cookie = nvlist_get_number(nvl, "user-cookie");
2436 		if (user_cookie > UINT32_MAX) {
2437 			err = EINVAL;
2438 			goto out_locked;
2439 		}
2440 		err = wg_socket_set_cookie(sc, user_cookie);
2441 		if (err)
2442 			goto out_locked;
2443 	}
2444 	if (nvlist_exists_nvlist_array(nvl, "peers")) {
2445 		size_t peercount;
2446 		const nvlist_t * const*nvl_peers;
2447 
2448 		nvl_peers = nvlist_get_nvlist_array(nvl, "peers", &peercount);
2449 		for (int i = 0; i < peercount; i++) {
2450 			err = wg_peer_add(sc, nvl_peers[i]);
2451 			if (err != 0)
2452 				goto out_locked;
2453 		}
2454 	}
2455 
2456 out_locked:
2457 	sx_xunlock(&sc->sc_lock);
2458 	nvlist_destroy(nvl);
2459 out:
2460 	zfree(nvlpacked, M_TEMP);
2461 	return (err);
2462 }
2463 
2464 static int
2465 wgc_get(struct wg_softc *sc, struct wg_data_io *wgd)
2466 {
2467 	uint8_t public_key[WG_KEY_SIZE] = { 0 };
2468 	uint8_t private_key[WG_KEY_SIZE] = { 0 };
2469 	uint8_t preshared_key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
2470 	nvlist_t *nvl, *nvl_peer, *nvl_aip, **nvl_peers, **nvl_aips;
2471 	size_t size, peer_count, aip_count, i, j;
2472 	struct wg_timespec64 ts64;
2473 	struct wg_peer *peer;
2474 	struct wg_aip *aip;
2475 	void *packed;
2476 	int err = 0;
2477 
2478 	nvl = nvlist_create(0);
2479 	if (!nvl)
2480 		return (ENOMEM);
2481 
2482 	sx_slock(&sc->sc_lock);
2483 
2484 	if (sc->sc_socket.so_port != 0)
2485 		nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
2486 	if (sc->sc_socket.so_user_cookie != 0)
2487 		nvlist_add_number(nvl, "user-cookie", sc->sc_socket.so_user_cookie);
2488 	if (noise_local_keys(sc->sc_local, public_key, private_key) == 0) {
2489 		nvlist_add_binary(nvl, "public-key", public_key, WG_KEY_SIZE);
2490 		if (wgc_privileged(sc))
2491 			nvlist_add_binary(nvl, "private-key", private_key, WG_KEY_SIZE);
2492 		explicit_bzero(private_key, sizeof(private_key));
2493 	}
2494 	peer_count = sc->sc_peers_num;
2495 	if (peer_count) {
2496 		nvl_peers = mallocarray(peer_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2497 		i = 0;
2498 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2499 			if (i >= peer_count)
2500 				panic("peers changed from under us");
2501 
2502 			nvl_peers[i++] = nvl_peer = nvlist_create(0);
2503 			if (!nvl_peer) {
2504 				err = ENOMEM;
2505 				goto err_peer;
2506 			}
2507 
2508 			(void)noise_remote_keys(peer->p_remote, public_key, preshared_key);
2509 			nvlist_add_binary(nvl_peer, "public-key", public_key, sizeof(public_key));
2510 			if (wgc_privileged(sc))
2511 				nvlist_add_binary(nvl_peer, "preshared-key", preshared_key, sizeof(preshared_key));
2512 			explicit_bzero(preshared_key, sizeof(preshared_key));
2513 			if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET)
2514 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in));
2515 			else if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET6)
2516 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in6));
2517 			wg_timers_get_last_handshake(peer, &ts64);
2518 			nvlist_add_binary(nvl_peer, "last-handshake-time", &ts64, sizeof(ts64));
2519 			nvlist_add_number(nvl_peer, "persistent-keepalive-interval", peer->p_persistent_keepalive_interval);
2520 			nvlist_add_number(nvl_peer, "rx-bytes", counter_u64_fetch(peer->p_rx_bytes));
2521 			nvlist_add_number(nvl_peer, "tx-bytes", counter_u64_fetch(peer->p_tx_bytes));
2522 
2523 			aip_count = peer->p_aips_num;
2524 			if (aip_count) {
2525 				nvl_aips = mallocarray(aip_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2526 				j = 0;
2527 				LIST_FOREACH(aip, &peer->p_aips, a_entry) {
2528 					if (j >= aip_count)
2529 						panic("aips changed from under us");
2530 
2531 					nvl_aips[j++] = nvl_aip = nvlist_create(0);
2532 					if (!nvl_aip) {
2533 						err = ENOMEM;
2534 						goto err_aip;
2535 					}
2536 					if (aip->a_af == AF_INET) {
2537 						nvlist_add_binary(nvl_aip, "ipv4", &aip->a_addr.in, sizeof(aip->a_addr.in));
2538 						nvlist_add_number(nvl_aip, "cidr", bitcount32(aip->a_mask.ip));
2539 					}
2540 #ifdef INET6
2541 					else if (aip->a_af == AF_INET6) {
2542 						nvlist_add_binary(nvl_aip, "ipv6", &aip->a_addr.in6, sizeof(aip->a_addr.in6));
2543 						nvlist_add_number(nvl_aip, "cidr", in6_mask2len(&aip->a_mask.in6, NULL));
2544 					}
2545 #endif
2546 				}
2547 				nvlist_add_nvlist_array(nvl_peer, "allowed-ips", (const nvlist_t *const *)nvl_aips, aip_count);
2548 			err_aip:
2549 				for (j = 0; j < aip_count; ++j)
2550 					nvlist_destroy(nvl_aips[j]);
2551 				free(nvl_aips, M_NVLIST);
2552 				if (err)
2553 					goto err_peer;
2554 			}
2555 		}
2556 		nvlist_add_nvlist_array(nvl, "peers", (const nvlist_t * const *)nvl_peers, peer_count);
2557 	err_peer:
2558 		for (i = 0; i < peer_count; ++i)
2559 			nvlist_destroy(nvl_peers[i]);
2560 		free(nvl_peers, M_NVLIST);
2561 		if (err) {
2562 			sx_sunlock(&sc->sc_lock);
2563 			goto err;
2564 		}
2565 	}
2566 	sx_sunlock(&sc->sc_lock);
2567 	packed = nvlist_pack(nvl, &size);
2568 	if (!packed) {
2569 		err = ENOMEM;
2570 		goto err;
2571 	}
2572 	if (!wgd->wgd_size) {
2573 		wgd->wgd_size = size;
2574 		goto out;
2575 	}
2576 	if (wgd->wgd_size < size) {
2577 		err = ENOSPC;
2578 		goto out;
2579 	}
2580 	err = copyout(packed, wgd->wgd_data, size);
2581 	wgd->wgd_size = size;
2582 
2583 out:
2584 	zfree(packed, M_NVLIST);
2585 err:
2586 	nvlist_destroy(nvl);
2587 	return (err);
2588 }
2589 
2590 static int
2591 wg_ioctl(if_t ifp, u_long cmd, caddr_t data)
2592 {
2593 	struct wg_data_io *wgd = (struct wg_data_io *)data;
2594 	struct ifreq *ifr = (struct ifreq *)data;
2595 	struct wg_softc *sc;
2596 	int ret = 0;
2597 
2598 	sx_slock(&wg_sx);
2599 	sc = if_getsoftc(ifp);
2600 	if (!sc) {
2601 		ret = ENXIO;
2602 		goto out;
2603 	}
2604 
2605 	switch (cmd) {
2606 	case SIOCSWG:
2607 		ret = priv_check(curthread, PRIV_NET_WG);
2608 		if (ret == 0)
2609 			ret = wgc_set(sc, wgd);
2610 		break;
2611 	case SIOCGWG:
2612 		ret = wgc_get(sc, wgd);
2613 		break;
2614 	/* Interface IOCTLs */
2615 	case SIOCSIFADDR:
2616 		/*
2617 		 * This differs from *BSD norms, but is more uniform with how
2618 		 * WireGuard behaves elsewhere.
2619 		 */
2620 		break;
2621 	case SIOCSIFFLAGS:
2622 		if (if_getflags(ifp) & IFF_UP)
2623 			ret = wg_up(sc);
2624 		else
2625 			wg_down(sc);
2626 		break;
2627 	case SIOCSIFMTU:
2628 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
2629 			ret = EINVAL;
2630 		else
2631 			if_setmtu(ifp, ifr->ifr_mtu);
2632 		break;
2633 	case SIOCADDMULTI:
2634 	case SIOCDELMULTI:
2635 		break;
2636 	case SIOCGTUNFIB:
2637 		ifr->ifr_fib = sc->sc_socket.so_fibnum;
2638 		break;
2639 	case SIOCSTUNFIB:
2640 		ret = priv_check(curthread, PRIV_NET_WG);
2641 		if (ret)
2642 			break;
2643 		ret = priv_check(curthread, PRIV_NET_SETIFFIB);
2644 		if (ret)
2645 			break;
2646 		sx_xlock(&sc->sc_lock);
2647 		ret = wg_socket_set_fibnum(sc, ifr->ifr_fib);
2648 		sx_xunlock(&sc->sc_lock);
2649 		break;
2650 	default:
2651 		ret = ENOTTY;
2652 	}
2653 
2654 out:
2655 	sx_sunlock(&wg_sx);
2656 	return (ret);
2657 }
2658 
2659 static int
2660 wg_up(struct wg_softc *sc)
2661 {
2662 	if_t ifp = sc->sc_ifp;
2663 	struct wg_peer *peer;
2664 	int rc = EBUSY;
2665 
2666 	sx_xlock(&sc->sc_lock);
2667 	/* Jail's being removed, no more wg_up(). */
2668 	if ((sc->sc_flags & WGF_DYING) != 0)
2669 		goto out;
2670 
2671 	/* Silent success if we're already running. */
2672 	rc = 0;
2673 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
2674 		goto out;
2675 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
2676 
2677 	rc = wg_socket_init(sc, sc->sc_socket.so_port);
2678 	if (rc == 0) {
2679 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
2680 			wg_timers_enable(peer);
2681 		if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
2682 	} else {
2683 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2684 		DPRINTF(sc, "Unable to initialize sockets: %d\n", rc);
2685 	}
2686 out:
2687 	sx_xunlock(&sc->sc_lock);
2688 	return (rc);
2689 }
2690 
2691 static void
2692 wg_down(struct wg_softc *sc)
2693 {
2694 	if_t ifp = sc->sc_ifp;
2695 	struct wg_peer *peer;
2696 
2697 	sx_xlock(&sc->sc_lock);
2698 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
2699 		sx_xunlock(&sc->sc_lock);
2700 		return;
2701 	}
2702 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2703 
2704 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2705 		wg_queue_purge(&peer->p_stage_queue);
2706 		wg_timers_disable(peer);
2707 	}
2708 
2709 	wg_queue_purge(&sc->sc_handshake_queue);
2710 
2711 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2712 		noise_remote_handshake_clear(peer->p_remote);
2713 		noise_remote_keypairs_clear(peer->p_remote);
2714 	}
2715 
2716 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2717 	wg_socket_uninit(sc);
2718 
2719 	sx_xunlock(&sc->sc_lock);
2720 }
2721 
2722 static int
2723 wg_clone_create(struct if_clone *ifc, char *name, size_t len,
2724     struct ifc_data *ifd, struct ifnet **ifpp)
2725 {
2726 	struct wg_softc *sc;
2727 	if_t ifp;
2728 
2729 	sc = malloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
2730 
2731 	sc->sc_local = noise_local_alloc(sc);
2732 
2733 	sc->sc_encrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2734 
2735 	sc->sc_decrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2736 
2737 	if (!rn_inithead((void **)&sc->sc_aip4, offsetof(struct aip_addr, in) * NBBY))
2738 		goto free_decrypt;
2739 
2740 	if (!rn_inithead((void **)&sc->sc_aip6, offsetof(struct aip_addr, in6) * NBBY))
2741 		goto free_aip4;
2742 
2743 	atomic_add_int(&clone_count, 1);
2744 	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
2745 
2746 	sc->sc_ucred = crhold(curthread->td_ucred);
2747 	sc->sc_socket.so_fibnum = curthread->td_proc->p_fibnum;
2748 	sc->sc_socket.so_port = 0;
2749 
2750 	TAILQ_INIT(&sc->sc_peers);
2751 	sc->sc_peers_num = 0;
2752 
2753 	cookie_checker_init(&sc->sc_cookie);
2754 
2755 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip4);
2756 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip6);
2757 
2758 	GROUPTASK_INIT(&sc->sc_handshake, 0, (gtask_fn_t *)wg_softc_handshake_receive, sc);
2759 	taskqgroup_attach(qgroup_wg_tqg, &sc->sc_handshake, sc, NULL, NULL, "wg tx initiation");
2760 	wg_queue_init(&sc->sc_handshake_queue, "hsq");
2761 
2762 	for (int i = 0; i < mp_ncpus; i++) {
2763 		GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
2764 		     (gtask_fn_t *)wg_softc_encrypt, sc);
2765 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_encrypt[i], sc, i, NULL, NULL, "wg encrypt");
2766 		GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
2767 		    (gtask_fn_t *)wg_softc_decrypt, sc);
2768 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_decrypt[i], sc, i, NULL, NULL, "wg decrypt");
2769 	}
2770 
2771 	wg_queue_init(&sc->sc_encrypt_parallel, "encp");
2772 	wg_queue_init(&sc->sc_decrypt_parallel, "decp");
2773 
2774 	sx_init(&sc->sc_lock, "wg softc lock");
2775 
2776 	if_setsoftc(ifp, sc);
2777 	if_setcapabilities(ifp, WG_CAPS);
2778 	if_setcapenable(ifp, WG_CAPS);
2779 	if_initname(ifp, wgname, ifd->unit);
2780 
2781 	if_setmtu(ifp, DEFAULT_MTU);
2782 	if_setflags(ifp, IFF_NOARP | IFF_MULTICAST);
2783 	if_setinitfn(ifp, wg_init);
2784 	if_setreassignfn(ifp, wg_reassign);
2785 	if_setqflushfn(ifp, wg_qflush);
2786 	if_settransmitfn(ifp, wg_transmit);
2787 	if_setoutputfn(ifp, wg_output);
2788 	if_setioctlfn(ifp, wg_ioctl);
2789 	if_attach(ifp);
2790 	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
2791 #ifdef INET6
2792 	ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
2793 	ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD;
2794 #endif
2795 	sx_xlock(&wg_sx);
2796 	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
2797 	sx_xunlock(&wg_sx);
2798 	*ifpp = ifp;
2799 	return (0);
2800 free_aip4:
2801 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2802 	free(sc->sc_aip4, M_RTABLE);
2803 free_decrypt:
2804 	free(sc->sc_decrypt, M_WG);
2805 	free(sc->sc_encrypt, M_WG);
2806 	noise_local_free(sc->sc_local, NULL);
2807 	free(sc, M_WG);
2808 	return (ENOMEM);
2809 }
2810 
2811 static void
2812 wg_clone_deferred_free(struct noise_local *l)
2813 {
2814 	struct wg_softc *sc = noise_local_arg(l);
2815 
2816 	free(sc, M_WG);
2817 	atomic_add_int(&clone_count, -1);
2818 }
2819 
2820 static int
2821 wg_clone_destroy(struct if_clone *ifc, if_t ifp, uint32_t flags)
2822 {
2823 	struct wg_softc *sc = if_getsoftc(ifp);
2824 	struct ucred *cred;
2825 
2826 	sx_xlock(&wg_sx);
2827 	if_setsoftc(ifp, NULL);
2828 	sx_xlock(&sc->sc_lock);
2829 	sc->sc_flags |= WGF_DYING;
2830 	cred = sc->sc_ucred;
2831 	sc->sc_ucred = NULL;
2832 	sx_xunlock(&sc->sc_lock);
2833 	LIST_REMOVE(sc, sc_entry);
2834 	sx_xunlock(&wg_sx);
2835 
2836 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2837 	CURVNET_SET(if_getvnet(sc->sc_ifp));
2838 	if_purgeaddrs(sc->sc_ifp);
2839 	CURVNET_RESTORE();
2840 
2841 	sx_xlock(&sc->sc_lock);
2842 	wg_socket_uninit(sc);
2843 	sx_xunlock(&sc->sc_lock);
2844 
2845 	/*
2846 	 * No guarantees that all traffic have passed until the epoch has
2847 	 * elapsed with the socket closed.
2848 	 */
2849 	NET_EPOCH_WAIT();
2850 
2851 	taskqgroup_drain_all(qgroup_wg_tqg);
2852 	sx_xlock(&sc->sc_lock);
2853 	wg_peer_destroy_all(sc);
2854 	NET_EPOCH_DRAIN_CALLBACKS();
2855 	sx_xunlock(&sc->sc_lock);
2856 	sx_destroy(&sc->sc_lock);
2857 	taskqgroup_detach(qgroup_wg_tqg, &sc->sc_handshake);
2858 	for (int i = 0; i < mp_ncpus; i++) {
2859 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_encrypt[i]);
2860 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_decrypt[i]);
2861 	}
2862 	free(sc->sc_encrypt, M_WG);
2863 	free(sc->sc_decrypt, M_WG);
2864 	wg_queue_deinit(&sc->sc_handshake_queue);
2865 	wg_queue_deinit(&sc->sc_encrypt_parallel);
2866 	wg_queue_deinit(&sc->sc_decrypt_parallel);
2867 
2868 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2869 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip6);
2870 	rn_detachhead((void **)&sc->sc_aip4);
2871 	rn_detachhead((void **)&sc->sc_aip6);
2872 
2873 	cookie_checker_free(&sc->sc_cookie);
2874 
2875 	if (cred != NULL)
2876 		crfree(cred);
2877 	bpfdetach(sc->sc_ifp);
2878 	if_detach(sc->sc_ifp);
2879 	if_free(sc->sc_ifp);
2880 
2881 	noise_local_free(sc->sc_local, wg_clone_deferred_free);
2882 
2883 	return (0);
2884 }
2885 
2886 static void
2887 wg_qflush(if_t ifp __unused)
2888 {
2889 }
2890 
2891 /*
2892  * Privileged information (private-key, preshared-key) are only exported for
2893  * root and jailed root by default.
2894  */
2895 static bool
2896 wgc_privileged(struct wg_softc *sc)
2897 {
2898 	struct thread *td;
2899 
2900 	td = curthread;
2901 	return (priv_check(td, PRIV_NET_WG) == 0);
2902 }
2903 
2904 static void
2905 wg_reassign(if_t ifp, struct vnet *new_vnet __unused,
2906     char *unused __unused)
2907 {
2908 	struct wg_softc *sc;
2909 
2910 	sc = if_getsoftc(ifp);
2911 	wg_down(sc);
2912 }
2913 
2914 static void
2915 wg_init(void *xsc)
2916 {
2917 	struct wg_softc *sc;
2918 
2919 	sc = xsc;
2920 	wg_up(sc);
2921 }
2922 
2923 static void
2924 vnet_wg_init(const void *unused __unused)
2925 {
2926 	struct if_clone_addreq req = {
2927 		.create_f = wg_clone_create,
2928 		.destroy_f = wg_clone_destroy,
2929 		.flags = IFC_F_AUTOUNIT,
2930 	};
2931 	V_wg_cloner = ifc_attach_cloner(wgname, &req);
2932 }
2933 VNET_SYSINIT(vnet_wg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2934 	     vnet_wg_init, NULL);
2935 
2936 static void
2937 vnet_wg_uninit(const void *unused __unused)
2938 {
2939 	if (V_wg_cloner)
2940 		ifc_detach_cloner(V_wg_cloner);
2941 }
2942 VNET_SYSUNINIT(vnet_wg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2943 	       vnet_wg_uninit, NULL);
2944 
2945 static int
2946 wg_prison_remove(void *obj, void *data __unused)
2947 {
2948 	const struct prison *pr = obj;
2949 	struct wg_softc *sc;
2950 
2951 	/*
2952 	 * Do a pass through all if_wg interfaces and release creds on any from
2953 	 * the jail that are supposed to be going away.  This will, in turn, let
2954 	 * the jail die so that we don't end up with Schrödinger's jail.
2955 	 */
2956 	sx_slock(&wg_sx);
2957 	LIST_FOREACH(sc, &wg_list, sc_entry) {
2958 		sx_xlock(&sc->sc_lock);
2959 		if (!(sc->sc_flags & WGF_DYING) && sc->sc_ucred && sc->sc_ucred->cr_prison == pr) {
2960 			struct ucred *cred = sc->sc_ucred;
2961 			DPRINTF(sc, "Creating jail exiting\n");
2962 			if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2963 			wg_socket_uninit(sc);
2964 			sc->sc_ucred = NULL;
2965 			crfree(cred);
2966 			sc->sc_flags |= WGF_DYING;
2967 		}
2968 		sx_xunlock(&sc->sc_lock);
2969 	}
2970 	sx_sunlock(&wg_sx);
2971 
2972 	return (0);
2973 }
2974 
2975 #ifdef SELFTESTS
2976 #include "selftest/allowedips.c"
2977 static bool wg_run_selftests(void)
2978 {
2979 	bool ret = true;
2980 	ret &= wg_allowedips_selftest();
2981 	ret &= noise_counter_selftest();
2982 	ret &= cookie_selftest();
2983 	return ret;
2984 }
2985 #else
2986 static inline bool wg_run_selftests(void) { return true; }
2987 #endif
2988 
2989 static int
2990 wg_module_init(void)
2991 {
2992 	int ret;
2993 	osd_method_t methods[PR_MAXMETHOD] = {
2994 		[PR_METHOD_REMOVE] = wg_prison_remove,
2995 	};
2996 
2997 	if ((wg_packet_zone = uma_zcreate("wg packet", sizeof(struct wg_packet),
2998 	     NULL, NULL, NULL, NULL, 0, 0)) == NULL)
2999 		return (ENOMEM);
3000 	ret = crypto_init();
3001 	if (ret != 0)
3002 		return (ret);
3003 	ret = cookie_init();
3004 	if (ret != 0)
3005 		return (ret);
3006 
3007 	wg_osd_jail_slot = osd_jail_register(NULL, methods);
3008 
3009 	if (!wg_run_selftests())
3010 		return (ENOTRECOVERABLE);
3011 
3012 	return (0);
3013 }
3014 
3015 static void
3016 wg_module_deinit(void)
3017 {
3018 	VNET_ITERATOR_DECL(vnet_iter);
3019 	VNET_LIST_RLOCK();
3020 	VNET_FOREACH(vnet_iter) {
3021 		struct if_clone *clone = VNET_VNET(vnet_iter, wg_cloner);
3022 		if (clone) {
3023 			ifc_detach_cloner(clone);
3024 			VNET_VNET(vnet_iter, wg_cloner) = NULL;
3025 		}
3026 	}
3027 	VNET_LIST_RUNLOCK();
3028 	NET_EPOCH_WAIT();
3029 	MPASS(LIST_EMPTY(&wg_list));
3030 	if (wg_osd_jail_slot != 0)
3031 		osd_jail_deregister(wg_osd_jail_slot);
3032 	cookie_deinit();
3033 	crypto_deinit();
3034 	if (wg_packet_zone != NULL)
3035 		uma_zdestroy(wg_packet_zone);
3036 }
3037 
3038 static int
3039 wg_module_event_handler(module_t mod, int what, void *arg)
3040 {
3041 	switch (what) {
3042 		case MOD_LOAD:
3043 			return wg_module_init();
3044 		case MOD_UNLOAD:
3045 			wg_module_deinit();
3046 			break;
3047 		default:
3048 			return (EOPNOTSUPP);
3049 	}
3050 	return (0);
3051 }
3052 
3053 static moduledata_t wg_moduledata = {
3054 	"if_wg",
3055 	wg_module_event_handler,
3056 	NULL
3057 };
3058 
3059 DECLARE_MODULE(if_wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
3060 MODULE_VERSION(if_wg, WIREGUARD_VERSION);
3061 MODULE_DEPEND(if_wg, crypto, 1, 1, 1);
3062