xref: /freebsd/sys/dev/wg/if_wg.c (revision 744bfb213144c63cbaf38d91a1c4f7aebb9b9fbc)
1 /* SPDX-License-Identifier: ISC
2  *
3  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4  * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
5  * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
6  * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
7  * Copyright (c) 2022 The FreeBSD Foundation
8  */
9 
10 #include "opt_inet.h"
11 #include "opt_inet6.h"
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/counter.h>
16 #include <sys/gtaskqueue.h>
17 #include <sys/jail.h>
18 #include <sys/kernel.h>
19 #include <sys/lock.h>
20 #include <sys/mbuf.h>
21 #include <sys/module.h>
22 #include <sys/nv.h>
23 #include <sys/priv.h>
24 #include <sys/protosw.h>
25 #include <sys/rmlock.h>
26 #include <sys/rwlock.h>
27 #include <sys/smp.h>
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/sockio.h>
31 #include <sys/sysctl.h>
32 #include <sys/sx.h>
33 #include <machine/_inttypes.h>
34 #include <net/bpf.h>
35 #include <net/ethernet.h>
36 #include <net/if.h>
37 #include <net/if_clone.h>
38 #include <net/if_types.h>
39 #include <net/if_var.h>
40 #include <net/netisr.h>
41 #include <net/radix.h>
42 #include <netinet/in.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip6.h>
46 #include <netinet/ip_icmp.h>
47 #include <netinet/icmp6.h>
48 #include <netinet/udp_var.h>
49 #include <netinet6/nd6.h>
50 
51 #include "support.h"
52 #include "wg_noise.h"
53 #include "wg_cookie.h"
54 #include "version.h"
55 #include "if_wg.h"
56 
57 #define DEFAULT_MTU		(ETHERMTU - 80)
58 #define MAX_MTU			(IF_MAXMTU - 80)
59 
60 #define MAX_STAGED_PKT		128
61 #define MAX_QUEUED_PKT		1024
62 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
63 
64 #define MAX_QUEUED_HANDSHAKES	4096
65 
66 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
67 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
68 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
69 #define UNDERLOAD_TIMEOUT	1
70 
71 #define DPRINTF(sc, ...) if (sc->sc_ifp->if_flags & IFF_DEBUG) if_printf(sc->sc_ifp, ##__VA_ARGS__)
72 
73 /* First byte indicating packet type on the wire */
74 #define WG_PKT_INITIATION htole32(1)
75 #define WG_PKT_RESPONSE htole32(2)
76 #define WG_PKT_COOKIE htole32(3)
77 #define WG_PKT_DATA htole32(4)
78 
79 #define WG_PKT_PADDING		16
80 #define WG_KEY_SIZE		32
81 
82 struct wg_pkt_initiation {
83 	uint32_t		t;
84 	uint32_t		s_idx;
85 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
86 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
87 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
88 	struct cookie_macs	m;
89 };
90 
91 struct wg_pkt_response {
92 	uint32_t		t;
93 	uint32_t		s_idx;
94 	uint32_t		r_idx;
95 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
96 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
97 	struct cookie_macs	m;
98 };
99 
100 struct wg_pkt_cookie {
101 	uint32_t		t;
102 	uint32_t		r_idx;
103 	uint8_t			nonce[COOKIE_NONCE_SIZE];
104 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
105 };
106 
107 struct wg_pkt_data {
108 	uint32_t		t;
109 	uint32_t		r_idx;
110 	uint64_t		nonce;
111 	uint8_t			buf[];
112 };
113 
114 struct wg_endpoint {
115 	union {
116 		struct sockaddr		r_sa;
117 		struct sockaddr_in	r_sin;
118 #ifdef INET6
119 		struct sockaddr_in6	r_sin6;
120 #endif
121 	} e_remote;
122 	union {
123 		struct in_addr		l_in;
124 #ifdef INET6
125 		struct in6_pktinfo	l_pktinfo6;
126 #define l_in6 l_pktinfo6.ipi6_addr
127 #endif
128 	} e_local;
129 };
130 
131 struct aip_addr {
132 	uint8_t		length;
133 	union {
134 		uint8_t		bytes[16];
135 		uint32_t	ip;
136 		uint32_t	ip6[4];
137 		struct in_addr	in;
138 		struct in6_addr	in6;
139 	};
140 };
141 
142 struct wg_aip {
143 	struct radix_node	 a_nodes[2];
144 	LIST_ENTRY(wg_aip)	 a_entry;
145 	struct aip_addr		 a_addr;
146 	struct aip_addr		 a_mask;
147 	struct wg_peer		*a_peer;
148 	sa_family_t		 a_af;
149 };
150 
151 struct wg_packet {
152 	STAILQ_ENTRY(wg_packet)	 p_serial;
153 	STAILQ_ENTRY(wg_packet)	 p_parallel;
154 	struct wg_endpoint	 p_endpoint;
155 	struct noise_keypair	*p_keypair;
156 	uint64_t		 p_nonce;
157 	struct mbuf		*p_mbuf;
158 	int			 p_mtu;
159 	sa_family_t		 p_af;
160 	enum wg_ring_state {
161 		WG_PACKET_UNCRYPTED,
162 		WG_PACKET_CRYPTED,
163 		WG_PACKET_DEAD,
164 	}			 p_state;
165 };
166 
167 STAILQ_HEAD(wg_packet_list, wg_packet);
168 
169 struct wg_queue {
170 	struct mtx		 q_mtx;
171 	struct wg_packet_list	 q_queue;
172 	size_t			 q_len;
173 };
174 
175 struct wg_peer {
176 	TAILQ_ENTRY(wg_peer)		 p_entry;
177 	uint64_t			 p_id;
178 	struct wg_softc			*p_sc;
179 
180 	struct noise_remote		*p_remote;
181 	struct cookie_maker		 p_cookie;
182 
183 	struct rwlock			 p_endpoint_lock;
184 	struct wg_endpoint		 p_endpoint;
185 
186 	struct wg_queue	 		 p_stage_queue;
187 	struct wg_queue	 		 p_encrypt_serial;
188 	struct wg_queue	 		 p_decrypt_serial;
189 
190 	bool				 p_enabled;
191 	bool				 p_need_another_keepalive;
192 	uint16_t			 p_persistent_keepalive_interval;
193 	struct callout			 p_new_handshake;
194 	struct callout			 p_send_keepalive;
195 	struct callout			 p_retry_handshake;
196 	struct callout			 p_zero_key_material;
197 	struct callout			 p_persistent_keepalive;
198 
199 	struct mtx			 p_handshake_mtx;
200 	struct timespec			 p_handshake_complete;	/* nanotime */
201 	int				 p_handshake_retries;
202 
203 	struct grouptask		 p_send;
204 	struct grouptask		 p_recv;
205 
206 	counter_u64_t			 p_tx_bytes;
207 	counter_u64_t			 p_rx_bytes;
208 
209 	LIST_HEAD(, wg_aip)		 p_aips;
210 	size_t				 p_aips_num;
211 };
212 
213 struct wg_socket {
214 	struct socket	*so_so4;
215 	struct socket	*so_so6;
216 	uint32_t	 so_user_cookie;
217 	int		 so_fibnum;
218 	in_port_t	 so_port;
219 };
220 
221 struct wg_softc {
222 	LIST_ENTRY(wg_softc)	 sc_entry;
223 	struct ifnet		*sc_ifp;
224 	int			 sc_flags;
225 
226 	struct ucred		*sc_ucred;
227 	struct wg_socket	 sc_socket;
228 
229 	TAILQ_HEAD(,wg_peer)	 sc_peers;
230 	size_t			 sc_peers_num;
231 
232 	struct noise_local	*sc_local;
233 	struct cookie_checker	 sc_cookie;
234 
235 	struct radix_node_head	*sc_aip4;
236 	struct radix_node_head	*sc_aip6;
237 
238 	struct grouptask	 sc_handshake;
239 	struct wg_queue		 sc_handshake_queue;
240 
241 	struct grouptask	*sc_encrypt;
242 	struct grouptask	*sc_decrypt;
243 	struct wg_queue		 sc_encrypt_parallel;
244 	struct wg_queue		 sc_decrypt_parallel;
245 	u_int			 sc_encrypt_last_cpu;
246 	u_int			 sc_decrypt_last_cpu;
247 
248 	struct sx		 sc_lock;
249 };
250 
251 #define	WGF_DYING	0x0001
252 
253 #define MAX_LOOPS	8
254 #define MTAG_WGLOOP	0x77676c70 /* wglp */
255 #ifndef ENOKEY
256 #define	ENOKEY	ENOTCAPABLE
257 #endif
258 
259 #define	GROUPTASK_DRAIN(gtask)			\
260 	gtaskqueue_drain((gtask)->gt_taskqueue, &(gtask)->gt_task)
261 
262 #define BPF_MTAP2_AF(ifp, m, af) do { \
263 		uint32_t __bpf_tap_af = (af); \
264 		BPF_MTAP2(ifp, &__bpf_tap_af, sizeof(__bpf_tap_af), m); \
265 	} while (0)
266 
267 static int clone_count;
268 static uma_zone_t wg_packet_zone;
269 static volatile unsigned long peer_counter = 0;
270 static const char wgname[] = "wg";
271 static unsigned wg_osd_jail_slot;
272 
273 static struct sx wg_sx;
274 SX_SYSINIT(wg_sx, &wg_sx, "wg_sx");
275 
276 static LIST_HEAD(, wg_softc) wg_list = LIST_HEAD_INITIALIZER(wg_list);
277 
278 static TASKQGROUP_DEFINE(wg_tqg, mp_ncpus, 1);
279 
280 MALLOC_DEFINE(M_WG, "WG", "wireguard");
281 
282 VNET_DEFINE_STATIC(struct if_clone *, wg_cloner);
283 
284 #define	V_wg_cloner	VNET(wg_cloner)
285 #define	WG_CAPS		IFCAP_LINKSTATE
286 
287 struct wg_timespec64 {
288 	uint64_t	tv_sec;
289 	uint64_t	tv_nsec;
290 };
291 
292 static int wg_socket_init(struct wg_softc *, in_port_t);
293 static int wg_socket_bind(struct socket **, struct socket **, in_port_t *);
294 static void wg_socket_set(struct wg_softc *, struct socket *, struct socket *);
295 static void wg_socket_uninit(struct wg_softc *);
296 static int wg_socket_set_sockopt(struct socket *, struct socket *, int, void *, size_t);
297 static int wg_socket_set_cookie(struct wg_softc *, uint32_t);
298 static int wg_socket_set_fibnum(struct wg_softc *, int);
299 static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
300 static void wg_timers_enable(struct wg_peer *);
301 static void wg_timers_disable(struct wg_peer *);
302 static void wg_timers_set_persistent_keepalive(struct wg_peer *, uint16_t);
303 static void wg_timers_get_last_handshake(struct wg_peer *, struct wg_timespec64 *);
304 static void wg_timers_event_data_sent(struct wg_peer *);
305 static void wg_timers_event_data_received(struct wg_peer *);
306 static void wg_timers_event_any_authenticated_packet_sent(struct wg_peer *);
307 static void wg_timers_event_any_authenticated_packet_received(struct wg_peer *);
308 static void wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *);
309 static void wg_timers_event_handshake_initiated(struct wg_peer *);
310 static void wg_timers_event_handshake_complete(struct wg_peer *);
311 static void wg_timers_event_session_derived(struct wg_peer *);
312 static void wg_timers_event_want_initiation(struct wg_peer *);
313 static void wg_timers_run_send_initiation(struct wg_peer *, bool);
314 static void wg_timers_run_retry_handshake(void *);
315 static void wg_timers_run_send_keepalive(void *);
316 static void wg_timers_run_new_handshake(void *);
317 static void wg_timers_run_zero_key_material(void *);
318 static void wg_timers_run_persistent_keepalive(void *);
319 static int wg_aip_add(struct wg_softc *, struct wg_peer *, sa_family_t, const void *, uint8_t);
320 static struct wg_peer *wg_aip_lookup(struct wg_softc *, sa_family_t, void *);
321 static void wg_aip_remove_all(struct wg_softc *, struct wg_peer *);
322 static struct wg_peer *wg_peer_alloc(struct wg_softc *, const uint8_t [WG_KEY_SIZE]);
323 static void wg_peer_free_deferred(struct noise_remote *);
324 static void wg_peer_destroy(struct wg_peer *);
325 static void wg_peer_destroy_all(struct wg_softc *);
326 static void wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
327 static void wg_send_initiation(struct wg_peer *);
328 static void wg_send_response(struct wg_peer *);
329 static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct wg_endpoint *);
330 static void wg_peer_set_endpoint(struct wg_peer *, struct wg_endpoint *);
331 static void wg_peer_clear_src(struct wg_peer *);
332 static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
333 static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *, size_t);
334 static void wg_send_keepalive(struct wg_peer *);
335 static void wg_handshake(struct wg_softc *, struct wg_packet *);
336 static void wg_encrypt(struct wg_softc *, struct wg_packet *);
337 static void wg_decrypt(struct wg_softc *, struct wg_packet *);
338 static void wg_softc_handshake_receive(struct wg_softc *);
339 static void wg_softc_decrypt(struct wg_softc *);
340 static void wg_softc_encrypt(struct wg_softc *);
341 static void wg_encrypt_dispatch(struct wg_softc *);
342 static void wg_decrypt_dispatch(struct wg_softc *);
343 static void wg_deliver_out(struct wg_peer *);
344 static void wg_deliver_in(struct wg_peer *);
345 static struct wg_packet *wg_packet_alloc(struct mbuf *);
346 static void wg_packet_free(struct wg_packet *);
347 static void wg_queue_init(struct wg_queue *, const char *);
348 static void wg_queue_deinit(struct wg_queue *);
349 static size_t wg_queue_len(struct wg_queue *);
350 static int wg_queue_enqueue_handshake(struct wg_queue *, struct wg_packet *);
351 static struct wg_packet *wg_queue_dequeue_handshake(struct wg_queue *);
352 static void wg_queue_push_staged(struct wg_queue *, struct wg_packet *);
353 static void wg_queue_enlist_staged(struct wg_queue *, struct wg_packet_list *);
354 static void wg_queue_delist_staged(struct wg_queue *, struct wg_packet_list *);
355 static void wg_queue_purge(struct wg_queue *);
356 static int wg_queue_both(struct wg_queue *, struct wg_queue *, struct wg_packet *);
357 static struct wg_packet *wg_queue_dequeue_serial(struct wg_queue *);
358 static struct wg_packet *wg_queue_dequeue_parallel(struct wg_queue *);
359 static bool wg_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *);
360 static void wg_peer_send_staged(struct wg_peer *);
361 static int wg_clone_create(struct if_clone *, int, caddr_t);
362 static void wg_qflush(struct ifnet *);
363 static inline int determine_af_and_pullup(struct mbuf **m, sa_family_t *af);
364 static int wg_xmit(struct ifnet *, struct mbuf *, sa_family_t, uint32_t);
365 static int wg_transmit(struct ifnet *, struct mbuf *);
366 static int wg_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *);
367 static void wg_clone_destroy(struct ifnet *);
368 static bool wgc_privileged(struct wg_softc *);
369 static int wgc_get(struct wg_softc *, struct wg_data_io *);
370 static int wgc_set(struct wg_softc *, struct wg_data_io *);
371 static int wg_up(struct wg_softc *);
372 static void wg_down(struct wg_softc *);
373 static void wg_reassign(struct ifnet *, struct vnet *, char *unused);
374 static void wg_init(void *);
375 static int wg_ioctl(struct ifnet *, u_long, caddr_t);
376 static void vnet_wg_init(const void *);
377 static void vnet_wg_uninit(const void *);
378 static int wg_module_init(void);
379 static void wg_module_deinit(void);
380 
381 /* TODO Peer */
382 static struct wg_peer *
383 wg_peer_alloc(struct wg_softc *sc, const uint8_t pub_key[WG_KEY_SIZE])
384 {
385 	struct wg_peer *peer;
386 
387 	sx_assert(&sc->sc_lock, SX_XLOCKED);
388 
389 	peer = malloc(sizeof(*peer), M_WG, M_WAITOK | M_ZERO);
390 	peer->p_remote = noise_remote_alloc(sc->sc_local, peer, pub_key);
391 	peer->p_tx_bytes = counter_u64_alloc(M_WAITOK);
392 	peer->p_rx_bytes = counter_u64_alloc(M_WAITOK);
393 	peer->p_id = peer_counter++;
394 	peer->p_sc = sc;
395 
396 	cookie_maker_init(&peer->p_cookie, pub_key);
397 
398 	rw_init(&peer->p_endpoint_lock, "wg_peer_endpoint");
399 
400 	wg_queue_init(&peer->p_stage_queue, "stageq");
401 	wg_queue_init(&peer->p_encrypt_serial, "txq");
402 	wg_queue_init(&peer->p_decrypt_serial, "rxq");
403 
404 	peer->p_enabled = false;
405 	peer->p_need_another_keepalive = false;
406 	peer->p_persistent_keepalive_interval = 0;
407 	callout_init(&peer->p_new_handshake, true);
408 	callout_init(&peer->p_send_keepalive, true);
409 	callout_init(&peer->p_retry_handshake, true);
410 	callout_init(&peer->p_persistent_keepalive, true);
411 	callout_init(&peer->p_zero_key_material, true);
412 
413 	mtx_init(&peer->p_handshake_mtx, "peer handshake", NULL, MTX_DEF);
414 	bzero(&peer->p_handshake_complete, sizeof(peer->p_handshake_complete));
415 	peer->p_handshake_retries = 0;
416 
417 	GROUPTASK_INIT(&peer->p_send, 0, (gtask_fn_t *)wg_deliver_out, peer);
418 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_send, peer, NULL, NULL, "wg send");
419 	GROUPTASK_INIT(&peer->p_recv, 0, (gtask_fn_t *)wg_deliver_in, peer);
420 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_recv, peer, NULL, NULL, "wg recv");
421 
422 	LIST_INIT(&peer->p_aips);
423 	peer->p_aips_num = 0;
424 
425 	return (peer);
426 }
427 
428 static void
429 wg_peer_free_deferred(struct noise_remote *r)
430 {
431 	struct wg_peer *peer = noise_remote_arg(r);
432 
433 	/* While there are no references remaining, we may still have
434 	 * p_{send,recv} executing (think empty queue, but wg_deliver_{in,out}
435 	 * needs to check the queue. We should wait for them and then free. */
436 	GROUPTASK_DRAIN(&peer->p_recv);
437 	GROUPTASK_DRAIN(&peer->p_send);
438 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_recv);
439 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_send);
440 
441 	wg_queue_deinit(&peer->p_decrypt_serial);
442 	wg_queue_deinit(&peer->p_encrypt_serial);
443 	wg_queue_deinit(&peer->p_stage_queue);
444 
445 	counter_u64_free(peer->p_tx_bytes);
446 	counter_u64_free(peer->p_rx_bytes);
447 	rw_destroy(&peer->p_endpoint_lock);
448 	mtx_destroy(&peer->p_handshake_mtx);
449 
450 	cookie_maker_free(&peer->p_cookie);
451 
452 	free(peer, M_WG);
453 }
454 
455 static void
456 wg_peer_destroy(struct wg_peer *peer)
457 {
458 	struct wg_softc *sc = peer->p_sc;
459 	sx_assert(&sc->sc_lock, SX_XLOCKED);
460 
461 	/* Disable remote and timers. This will prevent any new handshakes
462 	 * occuring. */
463 	noise_remote_disable(peer->p_remote);
464 	wg_timers_disable(peer);
465 
466 	/* Now we can remove all allowed IPs so no more packets will be routed
467 	 * to the peer. */
468 	wg_aip_remove_all(sc, peer);
469 
470 	/* Remove peer from the interface, then free. Some references may still
471 	 * exist to p_remote, so noise_remote_free will wait until they're all
472 	 * put to call wg_peer_free_deferred. */
473 	sc->sc_peers_num--;
474 	TAILQ_REMOVE(&sc->sc_peers, peer, p_entry);
475 	DPRINTF(sc, "Peer %" PRIu64 " destroyed\n", peer->p_id);
476 	noise_remote_free(peer->p_remote, wg_peer_free_deferred);
477 }
478 
479 static void
480 wg_peer_destroy_all(struct wg_softc *sc)
481 {
482 	struct wg_peer *peer, *tpeer;
483 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peers, p_entry, tpeer)
484 		wg_peer_destroy(peer);
485 }
486 
487 static void
488 wg_peer_set_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
489 {
490 	MPASS(e->e_remote.r_sa.sa_family != 0);
491 	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
492 		return;
493 
494 	rw_wlock(&peer->p_endpoint_lock);
495 	peer->p_endpoint = *e;
496 	rw_wunlock(&peer->p_endpoint_lock);
497 }
498 
499 static void
500 wg_peer_clear_src(struct wg_peer *peer)
501 {
502 	rw_wlock(&peer->p_endpoint_lock);
503 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
504 	rw_wunlock(&peer->p_endpoint_lock);
505 }
506 
507 static void
508 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
509 {
510 	rw_rlock(&peer->p_endpoint_lock);
511 	*e = peer->p_endpoint;
512 	rw_runlock(&peer->p_endpoint_lock);
513 }
514 
515 /* Allowed IP */
516 static int
517 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, sa_family_t af, const void *addr, uint8_t cidr)
518 {
519 	struct radix_node_head	*root;
520 	struct radix_node	*node;
521 	struct wg_aip		*aip;
522 	int			 ret = 0;
523 
524 	aip = malloc(sizeof(*aip), M_WG, M_WAITOK | M_ZERO);
525 	aip->a_peer = peer;
526 	aip->a_af = af;
527 
528 	switch (af) {
529 #ifdef INET
530 	case AF_INET:
531 		if (cidr > 32) cidr = 32;
532 		root = sc->sc_aip4;
533 		aip->a_addr.in = *(const struct in_addr *)addr;
534 		aip->a_mask.ip = htonl(~((1LL << (32 - cidr)) - 1) & 0xffffffff);
535 		aip->a_addr.ip &= aip->a_mask.ip;
536 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
537 		break;
538 #endif
539 #ifdef INET6
540 	case AF_INET6:
541 		if (cidr > 128) cidr = 128;
542 		root = sc->sc_aip6;
543 		aip->a_addr.in6 = *(const struct in6_addr *)addr;
544 		in6_prefixlen2mask(&aip->a_mask.in6, cidr);
545 		for (int i = 0; i < 4; i++)
546 			aip->a_addr.ip6[i] &= aip->a_mask.ip6[i];
547 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
548 		break;
549 #endif
550 	default:
551 		free(aip, M_WG);
552 		return (EAFNOSUPPORT);
553 	}
554 
555 	RADIX_NODE_HEAD_LOCK(root);
556 	node = root->rnh_addaddr(&aip->a_addr, &aip->a_mask, &root->rh, aip->a_nodes);
557 	if (node == aip->a_nodes) {
558 		LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
559 		peer->p_aips_num++;
560 	} else if (!node)
561 		node = root->rnh_lookup(&aip->a_addr, &aip->a_mask, &root->rh);
562 	if (!node) {
563 		free(aip, M_WG);
564 		return (ENOMEM);
565 	} else if (node != aip->a_nodes) {
566 		free(aip, M_WG);
567 		aip = (struct wg_aip *)node;
568 		if (aip->a_peer != peer) {
569 			LIST_REMOVE(aip, a_entry);
570 			aip->a_peer->p_aips_num--;
571 			aip->a_peer = peer;
572 			LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
573 			aip->a_peer->p_aips_num++;
574 		}
575 	}
576 	RADIX_NODE_HEAD_UNLOCK(root);
577 	return (ret);
578 }
579 
580 static struct wg_peer *
581 wg_aip_lookup(struct wg_softc *sc, sa_family_t af, void *a)
582 {
583 	struct radix_node_head	*root;
584 	struct radix_node	*node;
585 	struct wg_peer		*peer;
586 	struct aip_addr		 addr;
587 	RADIX_NODE_HEAD_RLOCK_TRACKER;
588 
589 	switch (af) {
590 	case AF_INET:
591 		root = sc->sc_aip4;
592 		memcpy(&addr.in, a, sizeof(addr.in));
593 		addr.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
594 		break;
595 	case AF_INET6:
596 		root = sc->sc_aip6;
597 		memcpy(&addr.in6, a, sizeof(addr.in6));
598 		addr.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
599 		break;
600 	default:
601 		return NULL;
602 	}
603 
604 	RADIX_NODE_HEAD_RLOCK(root);
605 	node = root->rnh_matchaddr(&addr, &root->rh);
606 	if (node != NULL) {
607 		peer = ((struct wg_aip *)node)->a_peer;
608 		noise_remote_ref(peer->p_remote);
609 	} else {
610 		peer = NULL;
611 	}
612 	RADIX_NODE_HEAD_RUNLOCK(root);
613 
614 	return (peer);
615 }
616 
617 static void
618 wg_aip_remove_all(struct wg_softc *sc, struct wg_peer *peer)
619 {
620 	struct wg_aip		*aip, *taip;
621 
622 	RADIX_NODE_HEAD_LOCK(sc->sc_aip4);
623 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
624 		if (aip->a_af == AF_INET) {
625 			if (sc->sc_aip4->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip4->rh) == NULL)
626 				panic("failed to delete aip %p", aip);
627 			LIST_REMOVE(aip, a_entry);
628 			peer->p_aips_num--;
629 			free(aip, M_WG);
630 		}
631 	}
632 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip4);
633 
634 	RADIX_NODE_HEAD_LOCK(sc->sc_aip6);
635 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
636 		if (aip->a_af == AF_INET6) {
637 			if (sc->sc_aip6->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip6->rh) == NULL)
638 				panic("failed to delete aip %p", aip);
639 			LIST_REMOVE(aip, a_entry);
640 			peer->p_aips_num--;
641 			free(aip, M_WG);
642 		}
643 	}
644 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip6);
645 
646 	if (!LIST_EMPTY(&peer->p_aips) || peer->p_aips_num != 0)
647 		panic("wg_aip_remove_all could not delete all %p", peer);
648 }
649 
650 static int
651 wg_socket_init(struct wg_softc *sc, in_port_t port)
652 {
653 	struct ucred *cred = sc->sc_ucred;
654 	struct socket *so4 = NULL, *so6 = NULL;
655 	int rc;
656 
657 	sx_assert(&sc->sc_lock, SX_XLOCKED);
658 
659 	if (!cred)
660 		return (EBUSY);
661 
662 	/*
663 	 * For socket creation, we use the creds of the thread that created the
664 	 * tunnel rather than the current thread to maintain the semantics that
665 	 * WireGuard has on Linux with network namespaces -- that the sockets
666 	 * are created in their home vnet so that they can be configured and
667 	 * functionally attached to a foreign vnet as the jail's only interface
668 	 * to the network.
669 	 */
670 #ifdef INET
671 	rc = socreate(AF_INET, &so4, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
672 	if (rc)
673 		goto out;
674 
675 	rc = udp_set_kernel_tunneling(so4, wg_input, NULL, sc);
676 	/*
677 	 * udp_set_kernel_tunneling can only fail if there is already a tunneling function set.
678 	 * This should never happen with a new socket.
679 	 */
680 	MPASS(rc == 0);
681 #endif
682 
683 #ifdef INET6
684 	rc = socreate(AF_INET6, &so6, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
685 	if (rc)
686 		goto out;
687 	rc = udp_set_kernel_tunneling(so6, wg_input, NULL, sc);
688 	MPASS(rc == 0);
689 #endif
690 
691 	if (sc->sc_socket.so_user_cookie) {
692 		rc = wg_socket_set_sockopt(so4, so6, SO_USER_COOKIE, &sc->sc_socket.so_user_cookie, sizeof(sc->sc_socket.so_user_cookie));
693 		if (rc)
694 			goto out;
695 	}
696 	rc = wg_socket_set_sockopt(so4, so6, SO_SETFIB, &sc->sc_socket.so_fibnum, sizeof(sc->sc_socket.so_fibnum));
697 	if (rc)
698 		goto out;
699 
700 	rc = wg_socket_bind(&so4, &so6, &port);
701 	if (!rc) {
702 		sc->sc_socket.so_port = port;
703 		wg_socket_set(sc, so4, so6);
704 	}
705 out:
706 	if (rc) {
707 		if (so4 != NULL)
708 			soclose(so4);
709 		if (so6 != NULL)
710 			soclose(so6);
711 	}
712 	return (rc);
713 }
714 
715 static int wg_socket_set_sockopt(struct socket *so4, struct socket *so6, int name, void *val, size_t len)
716 {
717 	int ret4 = 0, ret6 = 0;
718 	struct sockopt sopt = {
719 		.sopt_dir = SOPT_SET,
720 		.sopt_level = SOL_SOCKET,
721 		.sopt_name = name,
722 		.sopt_val = val,
723 		.sopt_valsize = len
724 	};
725 
726 	if (so4)
727 		ret4 = sosetopt(so4, &sopt);
728 	if (so6)
729 		ret6 = sosetopt(so6, &sopt);
730 	return (ret4 ?: ret6);
731 }
732 
733 static int wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
734 {
735 	struct wg_socket *so = &sc->sc_socket;
736 	int ret;
737 
738 	sx_assert(&sc->sc_lock, SX_XLOCKED);
739 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_USER_COOKIE, &user_cookie, sizeof(user_cookie));
740 	if (!ret)
741 		so->so_user_cookie = user_cookie;
742 	return (ret);
743 }
744 
745 static int wg_socket_set_fibnum(struct wg_softc *sc, int fibnum)
746 {
747 	struct wg_socket *so = &sc->sc_socket;
748 	int ret;
749 
750 	sx_assert(&sc->sc_lock, SX_XLOCKED);
751 
752 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_SETFIB, &fibnum, sizeof(fibnum));
753 	if (!ret)
754 		so->so_fibnum = fibnum;
755 	return (ret);
756 }
757 
758 static void
759 wg_socket_uninit(struct wg_softc *sc)
760 {
761 	wg_socket_set(sc, NULL, NULL);
762 }
763 
764 static void
765 wg_socket_set(struct wg_softc *sc, struct socket *new_so4, struct socket *new_so6)
766 {
767 	struct wg_socket *so = &sc->sc_socket;
768 	struct socket *so4, *so6;
769 
770 	sx_assert(&sc->sc_lock, SX_XLOCKED);
771 
772 	so4 = ck_pr_load_ptr(&so->so_so4);
773 	so6 = ck_pr_load_ptr(&so->so_so6);
774 	ck_pr_store_ptr(&so->so_so4, new_so4);
775 	ck_pr_store_ptr(&so->so_so6, new_so6);
776 
777 	if (!so4 && !so6)
778 		return;
779 	NET_EPOCH_WAIT();
780 	if (so4)
781 		soclose(so4);
782 	if (so6)
783 		soclose(so6);
784 }
785 
786 static int
787 wg_socket_bind(struct socket **in_so4, struct socket **in_so6, in_port_t *requested_port)
788 {
789 	struct socket *so4 = *in_so4, *so6 = *in_so6;
790 	int ret4 = 0, ret6 = 0;
791 	in_port_t port = *requested_port;
792 	struct sockaddr_in sin = {
793 		.sin_len = sizeof(struct sockaddr_in),
794 		.sin_family = AF_INET,
795 		.sin_port = htons(port)
796 	};
797 	struct sockaddr_in6 sin6 = {
798 		.sin6_len = sizeof(struct sockaddr_in6),
799 		.sin6_family = AF_INET6,
800 		.sin6_port = htons(port)
801 	};
802 
803 	if (so4) {
804 		ret4 = sobind(so4, (struct sockaddr *)&sin, curthread);
805 		if (ret4 && ret4 != EADDRNOTAVAIL)
806 			return (ret4);
807 		if (!ret4 && !sin.sin_port) {
808 			struct sockaddr_in *bound_sin;
809 			int ret = so4->so_proto->pr_sockaddr(so4,
810 			    (struct sockaddr **)&bound_sin);
811 			if (ret)
812 				return (ret);
813 			port = ntohs(bound_sin->sin_port);
814 			sin6.sin6_port = bound_sin->sin_port;
815 			free(bound_sin, M_SONAME);
816 		}
817 	}
818 
819 	if (so6) {
820 		ret6 = sobind(so6, (struct sockaddr *)&sin6, curthread);
821 		if (ret6 && ret6 != EADDRNOTAVAIL)
822 			return (ret6);
823 		if (!ret6 && !sin6.sin6_port) {
824 			struct sockaddr_in6 *bound_sin6;
825 			int ret = so6->so_proto->pr_sockaddr(so6,
826 			    (struct sockaddr **)&bound_sin6);
827 			if (ret)
828 				return (ret);
829 			port = ntohs(bound_sin6->sin6_port);
830 			free(bound_sin6, M_SONAME);
831 		}
832 	}
833 
834 	if (ret4 && ret6)
835 		return (ret4);
836 	*requested_port = port;
837 	if (ret4 && !ret6 && so4) {
838 		soclose(so4);
839 		*in_so4 = NULL;
840 	} else if (ret6 && !ret4 && so6) {
841 		soclose(so6);
842 		*in_so6 = NULL;
843 	}
844 	return (0);
845 }
846 
847 static int
848 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
849 {
850 	struct epoch_tracker et;
851 	struct sockaddr *sa;
852 	struct wg_socket *so = &sc->sc_socket;
853 	struct socket *so4, *so6;
854 	struct mbuf *control = NULL;
855 	int ret = 0;
856 	size_t len = m->m_pkthdr.len;
857 
858 	/* Get local control address before locking */
859 	if (e->e_remote.r_sa.sa_family == AF_INET) {
860 		if (e->e_local.l_in.s_addr != INADDR_ANY)
861 			control = sbcreatecontrol((caddr_t)&e->e_local.l_in,
862 			    sizeof(struct in_addr), IP_SENDSRCADDR,
863 			    IPPROTO_IP, M_NOWAIT);
864 #ifdef INET6
865 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
866 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
867 			control = sbcreatecontrol((caddr_t)&e->e_local.l_pktinfo6,
868 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
869 			    IPPROTO_IPV6, M_NOWAIT);
870 #endif
871 	} else {
872 		m_freem(m);
873 		return (EAFNOSUPPORT);
874 	}
875 
876 	/* Get remote address */
877 	sa = &e->e_remote.r_sa;
878 
879 	NET_EPOCH_ENTER(et);
880 	so4 = ck_pr_load_ptr(&so->so_so4);
881 	so6 = ck_pr_load_ptr(&so->so_so6);
882 	if (e->e_remote.r_sa.sa_family == AF_INET && so4 != NULL)
883 		ret = sosend(so4, sa, NULL, m, control, 0, curthread);
884 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && so6 != NULL)
885 		ret = sosend(so6, sa, NULL, m, control, 0, curthread);
886 	else {
887 		ret = ENOTCONN;
888 		m_freem(control);
889 		m_freem(m);
890 	}
891 	NET_EPOCH_EXIT(et);
892 	if (ret == 0) {
893 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
894 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
895 	}
896 	return (ret);
897 }
898 
899 static void
900 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf, size_t len)
901 {
902 	struct mbuf	*m;
903 	int		 ret = 0;
904 	bool		 retried = false;
905 
906 retry:
907 	m = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
908 	if (!m) {
909 		ret = ENOMEM;
910 		goto out;
911 	}
912 	m_copyback(m, 0, len, buf);
913 
914 	if (ret == 0) {
915 		ret = wg_send(sc, e, m);
916 		/* Retry if we couldn't bind to e->e_local */
917 		if (ret == EADDRNOTAVAIL && !retried) {
918 			bzero(&e->e_local, sizeof(e->e_local));
919 			retried = true;
920 			goto retry;
921 		}
922 	} else {
923 		ret = wg_send(sc, e, m);
924 	}
925 out:
926 	if (ret)
927 		DPRINTF(sc, "Unable to send packet: %d\n", ret);
928 }
929 
930 /* Timers */
931 static void
932 wg_timers_enable(struct wg_peer *peer)
933 {
934 	ck_pr_store_bool(&peer->p_enabled, true);
935 	wg_timers_run_persistent_keepalive(peer);
936 }
937 
938 static void
939 wg_timers_disable(struct wg_peer *peer)
940 {
941 	/* By setting p_enabled = false, then calling NET_EPOCH_WAIT, we can be
942 	 * sure no new handshakes are created after the wait. This is because
943 	 * all callout_resets (scheduling the callout) are guarded by
944 	 * p_enabled. We can be sure all sections that read p_enabled and then
945 	 * optionally call callout_reset are finished as they are surrounded by
946 	 * NET_EPOCH_{ENTER,EXIT}.
947 	 *
948 	 * However, as new callouts may be scheduled during NET_EPOCH_WAIT (but
949 	 * not after), we stop all callouts leaving no callouts active.
950 	 *
951 	 * We should also pull NET_EPOCH_WAIT out of the FOREACH(peer) loops, but the
952 	 * performance impact is acceptable for the time being. */
953 	ck_pr_store_bool(&peer->p_enabled, false);
954 	NET_EPOCH_WAIT();
955 	ck_pr_store_bool(&peer->p_need_another_keepalive, false);
956 
957 	callout_stop(&peer->p_new_handshake);
958 	callout_stop(&peer->p_send_keepalive);
959 	callout_stop(&peer->p_retry_handshake);
960 	callout_stop(&peer->p_persistent_keepalive);
961 	callout_stop(&peer->p_zero_key_material);
962 }
963 
964 static void
965 wg_timers_set_persistent_keepalive(struct wg_peer *peer, uint16_t interval)
966 {
967 	struct epoch_tracker et;
968 	if (interval != peer->p_persistent_keepalive_interval) {
969 		ck_pr_store_16(&peer->p_persistent_keepalive_interval, interval);
970 		NET_EPOCH_ENTER(et);
971 		if (ck_pr_load_bool(&peer->p_enabled))
972 			wg_timers_run_persistent_keepalive(peer);
973 		NET_EPOCH_EXIT(et);
974 	}
975 }
976 
977 static void
978 wg_timers_get_last_handshake(struct wg_peer *peer, struct wg_timespec64 *time)
979 {
980 	mtx_lock(&peer->p_handshake_mtx);
981 	time->tv_sec = peer->p_handshake_complete.tv_sec;
982 	time->tv_nsec = peer->p_handshake_complete.tv_nsec;
983 	mtx_unlock(&peer->p_handshake_mtx);
984 }
985 
986 static void
987 wg_timers_event_data_sent(struct wg_peer *peer)
988 {
989 	struct epoch_tracker et;
990 	NET_EPOCH_ENTER(et);
991 	if (ck_pr_load_bool(&peer->p_enabled) && !callout_pending(&peer->p_new_handshake))
992 		callout_reset(&peer->p_new_handshake, MSEC_2_TICKS(
993 		    NEW_HANDSHAKE_TIMEOUT * 1000 +
994 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
995 		    wg_timers_run_new_handshake, peer);
996 	NET_EPOCH_EXIT(et);
997 }
998 
999 static void
1000 wg_timers_event_data_received(struct wg_peer *peer)
1001 {
1002 	struct epoch_tracker et;
1003 	NET_EPOCH_ENTER(et);
1004 	if (ck_pr_load_bool(&peer->p_enabled)) {
1005 		if (!callout_pending(&peer->p_send_keepalive))
1006 			callout_reset(&peer->p_send_keepalive,
1007 			    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1008 			    wg_timers_run_send_keepalive, peer);
1009 		else
1010 			ck_pr_store_bool(&peer->p_need_another_keepalive, true);
1011 	}
1012 	NET_EPOCH_EXIT(et);
1013 }
1014 
1015 static void
1016 wg_timers_event_any_authenticated_packet_sent(struct wg_peer *peer)
1017 {
1018 	callout_stop(&peer->p_send_keepalive);
1019 }
1020 
1021 static void
1022 wg_timers_event_any_authenticated_packet_received(struct wg_peer *peer)
1023 {
1024 	callout_stop(&peer->p_new_handshake);
1025 }
1026 
1027 static void
1028 wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *peer)
1029 {
1030 	struct epoch_tracker et;
1031 	uint16_t interval;
1032 	NET_EPOCH_ENTER(et);
1033 	interval = ck_pr_load_16(&peer->p_persistent_keepalive_interval);
1034 	if (ck_pr_load_bool(&peer->p_enabled) && interval > 0)
1035 		callout_reset(&peer->p_persistent_keepalive,
1036 		     MSEC_2_TICKS(interval * 1000),
1037 		     wg_timers_run_persistent_keepalive, peer);
1038 	NET_EPOCH_EXIT(et);
1039 }
1040 
1041 static void
1042 wg_timers_event_handshake_initiated(struct wg_peer *peer)
1043 {
1044 	struct epoch_tracker et;
1045 	NET_EPOCH_ENTER(et);
1046 	if (ck_pr_load_bool(&peer->p_enabled))
1047 		callout_reset(&peer->p_retry_handshake, MSEC_2_TICKS(
1048 		    REKEY_TIMEOUT * 1000 +
1049 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
1050 		    wg_timers_run_retry_handshake, peer);
1051 	NET_EPOCH_EXIT(et);
1052 }
1053 
1054 static void
1055 wg_timers_event_handshake_complete(struct wg_peer *peer)
1056 {
1057 	struct epoch_tracker et;
1058 	NET_EPOCH_ENTER(et);
1059 	if (ck_pr_load_bool(&peer->p_enabled)) {
1060 		mtx_lock(&peer->p_handshake_mtx);
1061 		callout_stop(&peer->p_retry_handshake);
1062 		peer->p_handshake_retries = 0;
1063 		getnanotime(&peer->p_handshake_complete);
1064 		mtx_unlock(&peer->p_handshake_mtx);
1065 		wg_timers_run_send_keepalive(peer);
1066 	}
1067 	NET_EPOCH_EXIT(et);
1068 }
1069 
1070 static void
1071 wg_timers_event_session_derived(struct wg_peer *peer)
1072 {
1073 	struct epoch_tracker et;
1074 	NET_EPOCH_ENTER(et);
1075 	if (ck_pr_load_bool(&peer->p_enabled))
1076 		callout_reset(&peer->p_zero_key_material,
1077 		    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1078 		    wg_timers_run_zero_key_material, peer);
1079 	NET_EPOCH_EXIT(et);
1080 }
1081 
1082 static void
1083 wg_timers_event_want_initiation(struct wg_peer *peer)
1084 {
1085 	struct epoch_tracker et;
1086 	NET_EPOCH_ENTER(et);
1087 	if (ck_pr_load_bool(&peer->p_enabled))
1088 		wg_timers_run_send_initiation(peer, false);
1089 	NET_EPOCH_EXIT(et);
1090 }
1091 
1092 static void
1093 wg_timers_run_send_initiation(struct wg_peer *peer, bool is_retry)
1094 {
1095 	if (!is_retry)
1096 		peer->p_handshake_retries = 0;
1097 	if (noise_remote_initiation_expired(peer->p_remote) == ETIMEDOUT)
1098 		wg_send_initiation(peer);
1099 }
1100 
1101 static void
1102 wg_timers_run_retry_handshake(void *_peer)
1103 {
1104 	struct epoch_tracker et;
1105 	struct wg_peer *peer = _peer;
1106 
1107 	mtx_lock(&peer->p_handshake_mtx);
1108 	if (peer->p_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1109 		peer->p_handshake_retries++;
1110 		mtx_unlock(&peer->p_handshake_mtx);
1111 
1112 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1113 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1114 		    REKEY_TIMEOUT, peer->p_handshake_retries + 1);
1115 		wg_peer_clear_src(peer);
1116 		wg_timers_run_send_initiation(peer, true);
1117 	} else {
1118 		mtx_unlock(&peer->p_handshake_mtx);
1119 
1120 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
1121 		    "after %d retries, giving up\n", peer->p_id,
1122 		    MAX_TIMER_HANDSHAKES + 2);
1123 
1124 		callout_stop(&peer->p_send_keepalive);
1125 		wg_queue_purge(&peer->p_stage_queue);
1126 		NET_EPOCH_ENTER(et);
1127 		if (ck_pr_load_bool(&peer->p_enabled) &&
1128 		    !callout_pending(&peer->p_zero_key_material))
1129 			callout_reset(&peer->p_zero_key_material,
1130 			    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
1131 			    wg_timers_run_zero_key_material, peer);
1132 		NET_EPOCH_EXIT(et);
1133 	}
1134 }
1135 
1136 static void
1137 wg_timers_run_send_keepalive(void *_peer)
1138 {
1139 	struct epoch_tracker et;
1140 	struct wg_peer *peer = _peer;
1141 
1142 	wg_send_keepalive(peer);
1143 	NET_EPOCH_ENTER(et);
1144 	if (ck_pr_load_bool(&peer->p_enabled) &&
1145 	    ck_pr_load_bool(&peer->p_need_another_keepalive)) {
1146 		ck_pr_store_bool(&peer->p_need_another_keepalive, false);
1147 		callout_reset(&peer->p_send_keepalive,
1148 		    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
1149 		    wg_timers_run_send_keepalive, peer);
1150 	}
1151 	NET_EPOCH_EXIT(et);
1152 }
1153 
1154 static void
1155 wg_timers_run_new_handshake(void *_peer)
1156 {
1157 	struct wg_peer *peer = _peer;
1158 
1159 	DPRINTF(peer->p_sc, "Retrying handshake with peer %" PRIu64 " because we "
1160 	    "stopped hearing back after %d seconds\n",
1161 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1162 
1163 	wg_peer_clear_src(peer);
1164 	wg_timers_run_send_initiation(peer, false);
1165 }
1166 
1167 static void
1168 wg_timers_run_zero_key_material(void *_peer)
1169 {
1170 	struct wg_peer *peer = _peer;
1171 
1172 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %" PRIu64 ", since we "
1173 	    "haven't received a new one in %d seconds\n",
1174 	    peer->p_id, REJECT_AFTER_TIME * 3);
1175 	noise_remote_keypairs_clear(peer->p_remote);
1176 }
1177 
1178 static void
1179 wg_timers_run_persistent_keepalive(void *_peer)
1180 {
1181 	struct wg_peer *peer = _peer;
1182 
1183 	if (ck_pr_load_16(&peer->p_persistent_keepalive_interval) > 0)
1184 		wg_send_keepalive(peer);
1185 }
1186 
1187 /* TODO Handshake */
1188 static void
1189 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1190 {
1191 	struct wg_endpoint endpoint;
1192 
1193 	counter_u64_add(peer->p_tx_bytes, len);
1194 	wg_timers_event_any_authenticated_packet_traversal(peer);
1195 	wg_timers_event_any_authenticated_packet_sent(peer);
1196 	wg_peer_get_endpoint(peer, &endpoint);
1197 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1198 }
1199 
1200 static void
1201 wg_send_initiation(struct wg_peer *peer)
1202 {
1203 	struct wg_pkt_initiation pkt;
1204 
1205 	if (noise_create_initiation(peer->p_remote, &pkt.s_idx, pkt.ue,
1206 	    pkt.es, pkt.ets) != 0)
1207 		return;
1208 
1209 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %" PRIu64 "\n", peer->p_id);
1210 
1211 	pkt.t = WG_PKT_INITIATION;
1212 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1213 	    sizeof(pkt) - sizeof(pkt.m));
1214 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1215 	wg_timers_event_handshake_initiated(peer);
1216 }
1217 
1218 static void
1219 wg_send_response(struct wg_peer *peer)
1220 {
1221 	struct wg_pkt_response pkt;
1222 
1223 	if (noise_create_response(peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1224 	    pkt.ue, pkt.en) != 0)
1225 		return;
1226 
1227 	DPRINTF(peer->p_sc, "Sending handshake response to peer %" PRIu64 "\n", peer->p_id);
1228 
1229 	wg_timers_event_session_derived(peer);
1230 	pkt.t = WG_PKT_RESPONSE;
1231 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1232 	     sizeof(pkt)-sizeof(pkt.m));
1233 	wg_peer_send_buf(peer, (uint8_t*)&pkt, sizeof(pkt));
1234 }
1235 
1236 static void
1237 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1238     struct wg_endpoint *e)
1239 {
1240 	struct wg_pkt_cookie	pkt;
1241 
1242 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1243 
1244 	pkt.t = WG_PKT_COOKIE;
1245 	pkt.r_idx = idx;
1246 
1247 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1248 	    pkt.ec, &e->e_remote.r_sa);
1249 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1250 }
1251 
1252 static void
1253 wg_send_keepalive(struct wg_peer *peer)
1254 {
1255 	struct wg_packet *pkt;
1256 	struct mbuf *m;
1257 
1258 	if (wg_queue_len(&peer->p_stage_queue) > 0)
1259 		goto send;
1260 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1261 		return;
1262 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1263 		m_freem(m);
1264 		return;
1265 	}
1266 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
1267 	DPRINTF(peer->p_sc, "Sending keepalive packet to peer %" PRIu64 "\n", peer->p_id);
1268 send:
1269 	wg_peer_send_staged(peer);
1270 }
1271 
1272 static void
1273 wg_handshake(struct wg_softc *sc, struct wg_packet *pkt)
1274 {
1275 	struct wg_pkt_initiation	*init;
1276 	struct wg_pkt_response		*resp;
1277 	struct wg_pkt_cookie		*cook;
1278 	struct wg_endpoint		*e;
1279 	struct wg_peer			*peer;
1280 	struct mbuf			*m;
1281 	struct noise_remote		*remote = NULL;
1282 	int				 res;
1283 	bool				 underload = false;
1284 	static sbintime_t		 wg_last_underload; /* sbinuptime */
1285 
1286 	underload = wg_queue_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES / 8;
1287 	if (underload) {
1288 		wg_last_underload = getsbinuptime();
1289 	} else if (wg_last_underload) {
1290 		underload = wg_last_underload + UNDERLOAD_TIMEOUT * SBT_1S > getsbinuptime();
1291 		if (!underload)
1292 			wg_last_underload = 0;
1293 	}
1294 
1295 	m = pkt->p_mbuf;
1296 	e = &pkt->p_endpoint;
1297 
1298 	if ((pkt->p_mbuf = m = m_pullup(m, m->m_pkthdr.len)) == NULL)
1299 		goto error;
1300 
1301 	switch (*mtod(m, uint32_t *)) {
1302 	case WG_PKT_INITIATION:
1303 		init = mtod(m, struct wg_pkt_initiation *);
1304 
1305 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1306 				init, sizeof(*init) - sizeof(init->m),
1307 				underload, &e->e_remote.r_sa,
1308 				sc->sc_ifp->if_vnet);
1309 
1310 		if (res == EINVAL) {
1311 			DPRINTF(sc, "Invalid initiation MAC\n");
1312 			goto error;
1313 		} else if (res == ECONNREFUSED) {
1314 			DPRINTF(sc, "Handshake ratelimited\n");
1315 			goto error;
1316 		} else if (res == EAGAIN) {
1317 			wg_send_cookie(sc, &init->m, init->s_idx, e);
1318 			goto error;
1319 		} else if (res != 0) {
1320 			panic("unexpected response: %d\n", res);
1321 		}
1322 
1323 		if (noise_consume_initiation(sc->sc_local, &remote,
1324 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1325 			DPRINTF(sc, "Invalid handshake initiation\n");
1326 			goto error;
1327 		}
1328 
1329 		peer = noise_remote_arg(remote);
1330 
1331 		DPRINTF(sc, "Receiving handshake initiation from peer %" PRIu64 "\n", peer->p_id);
1332 
1333 		wg_peer_set_endpoint(peer, e);
1334 		wg_send_response(peer);
1335 		break;
1336 	case WG_PKT_RESPONSE:
1337 		resp = mtod(m, struct wg_pkt_response *);
1338 
1339 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1340 				resp, sizeof(*resp) - sizeof(resp->m),
1341 				underload, &e->e_remote.r_sa,
1342 				sc->sc_ifp->if_vnet);
1343 
1344 		if (res == EINVAL) {
1345 			DPRINTF(sc, "Invalid response MAC\n");
1346 			goto error;
1347 		} else if (res == ECONNREFUSED) {
1348 			DPRINTF(sc, "Handshake ratelimited\n");
1349 			goto error;
1350 		} else if (res == EAGAIN) {
1351 			wg_send_cookie(sc, &resp->m, resp->s_idx, e);
1352 			goto error;
1353 		} else if (res != 0) {
1354 			panic("unexpected response: %d\n", res);
1355 		}
1356 
1357 		if (noise_consume_response(sc->sc_local, &remote,
1358 		    resp->s_idx, resp->r_idx, resp->ue, resp->en) != 0) {
1359 			DPRINTF(sc, "Invalid handshake response\n");
1360 			goto error;
1361 		}
1362 
1363 		peer = noise_remote_arg(remote);
1364 		DPRINTF(sc, "Receiving handshake response from peer %" PRIu64 "\n", peer->p_id);
1365 
1366 		wg_peer_set_endpoint(peer, e);
1367 		wg_timers_event_session_derived(peer);
1368 		wg_timers_event_handshake_complete(peer);
1369 		break;
1370 	case WG_PKT_COOKIE:
1371 		cook = mtod(m, struct wg_pkt_cookie *);
1372 
1373 		if ((remote = noise_remote_index(sc->sc_local, cook->r_idx)) == NULL) {
1374 			DPRINTF(sc, "Unknown cookie index\n");
1375 			goto error;
1376 		}
1377 
1378 		peer = noise_remote_arg(remote);
1379 
1380 		if (cookie_maker_consume_payload(&peer->p_cookie,
1381 		    cook->nonce, cook->ec) == 0) {
1382 			DPRINTF(sc, "Receiving cookie response\n");
1383 		} else {
1384 			DPRINTF(sc, "Could not decrypt cookie response\n");
1385 			goto error;
1386 		}
1387 
1388 		goto not_authenticated;
1389 	default:
1390 		panic("invalid packet in handshake queue");
1391 	}
1392 
1393 	wg_timers_event_any_authenticated_packet_received(peer);
1394 	wg_timers_event_any_authenticated_packet_traversal(peer);
1395 
1396 not_authenticated:
1397 	counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len);
1398 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1399 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1400 error:
1401 	if (remote != NULL)
1402 		noise_remote_put(remote);
1403 	wg_packet_free(pkt);
1404 }
1405 
1406 static void
1407 wg_softc_handshake_receive(struct wg_softc *sc)
1408 {
1409 	struct wg_packet *pkt;
1410 	while ((pkt = wg_queue_dequeue_handshake(&sc->sc_handshake_queue)) != NULL)
1411 		wg_handshake(sc, pkt);
1412 }
1413 
1414 static void
1415 wg_mbuf_reset(struct mbuf *m)
1416 {
1417 
1418 	struct m_tag *t, *tmp;
1419 
1420 	/*
1421 	 * We want to reset the mbuf to a newly allocated state, containing
1422 	 * just the packet contents. Unfortunately FreeBSD doesn't seem to
1423 	 * offer this anywhere, so we have to make it up as we go. If we can
1424 	 * get this in kern/kern_mbuf.c, that would be best.
1425 	 *
1426 	 * Notice: this may break things unexpectedly but it is better to fail
1427 	 *         closed in the extreme case than leak informtion in every
1428 	 *         case.
1429 	 *
1430 	 * With that said, all this attempts to do is remove any extraneous
1431 	 * information that could be present.
1432 	 */
1433 
1434 	M_ASSERTPKTHDR(m);
1435 
1436 	m->m_flags &= ~(M_BCAST|M_MCAST|M_VLANTAG|M_PROMISC|M_PROTOFLAGS);
1437 
1438 	M_HASHTYPE_CLEAR(m);
1439 #ifdef NUMA
1440         m->m_pkthdr.numa_domain = M_NODOM;
1441 #endif
1442 	SLIST_FOREACH_SAFE(t, &m->m_pkthdr.tags, m_tag_link, tmp) {
1443 		if ((t->m_tag_id != 0 || t->m_tag_cookie != MTAG_WGLOOP) &&
1444 		    t->m_tag_id != PACKET_TAG_MACLABEL)
1445 			m_tag_delete(m, t);
1446 	}
1447 
1448 	KASSERT((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0,
1449 	    ("%s: mbuf %p has a send tag", __func__, m));
1450 
1451 	m->m_pkthdr.csum_flags = 0;
1452 	m->m_pkthdr.PH_per.sixtyfour[0] = 0;
1453 	m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
1454 }
1455 
1456 static inline unsigned int
1457 calculate_padding(struct wg_packet *pkt)
1458 {
1459 	unsigned int padded_size, last_unit = pkt->p_mbuf->m_pkthdr.len;
1460 
1461 	if (__predict_false(!pkt->p_mtu))
1462 		return (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
1463 
1464 	if (__predict_false(last_unit > pkt->p_mtu))
1465 		last_unit %= pkt->p_mtu;
1466 
1467 	padded_size = (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
1468 	if (pkt->p_mtu < padded_size)
1469 		padded_size = pkt->p_mtu;
1470 	return padded_size - last_unit;
1471 }
1472 
1473 static void
1474 wg_encrypt(struct wg_softc *sc, struct wg_packet *pkt)
1475 {
1476 	static const uint8_t	 padding[WG_PKT_PADDING] = { 0 };
1477 	struct wg_pkt_data	*data;
1478 	struct wg_peer		*peer;
1479 	struct noise_remote	*remote;
1480 	struct mbuf		*m;
1481 	uint32_t		 idx;
1482 	unsigned int		 padlen;
1483 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1484 
1485 	remote = noise_keypair_remote(pkt->p_keypair);
1486 	peer = noise_remote_arg(remote);
1487 	m = pkt->p_mbuf;
1488 
1489 	/* Pad the packet */
1490 	padlen = calculate_padding(pkt);
1491 	if (padlen != 0 && !m_append(m, padlen, padding))
1492 		goto out;
1493 
1494 	/* Do encryption */
1495 	if (noise_keypair_encrypt(pkt->p_keypair, &idx, pkt->p_nonce, m) != 0)
1496 		goto out;
1497 
1498 	/* Put header into packet */
1499 	M_PREPEND(m, sizeof(struct wg_pkt_data), M_NOWAIT);
1500 	if (m == NULL)
1501 		goto out;
1502 	data = mtod(m, struct wg_pkt_data *);
1503 	data->t = WG_PKT_DATA;
1504 	data->r_idx = idx;
1505 	data->nonce = htole64(pkt->p_nonce);
1506 
1507 	wg_mbuf_reset(m);
1508 	state = WG_PACKET_CRYPTED;
1509 out:
1510 	pkt->p_mbuf = m;
1511 	wmb();
1512 	pkt->p_state = state;
1513 	GROUPTASK_ENQUEUE(&peer->p_send);
1514 	noise_remote_put(remote);
1515 }
1516 
1517 static void
1518 wg_decrypt(struct wg_softc *sc, struct wg_packet *pkt)
1519 {
1520 	struct wg_peer		*peer, *allowed_peer;
1521 	struct noise_remote	*remote;
1522 	struct mbuf		*m;
1523 	int			 len;
1524 	enum wg_ring_state	 state = WG_PACKET_DEAD;
1525 
1526 	remote = noise_keypair_remote(pkt->p_keypair);
1527 	peer = noise_remote_arg(remote);
1528 	m = pkt->p_mbuf;
1529 
1530 	/* Read nonce and then adjust to remove the header. */
1531 	pkt->p_nonce = le64toh(mtod(m, struct wg_pkt_data *)->nonce);
1532 	m_adj(m, sizeof(struct wg_pkt_data));
1533 
1534 	if (noise_keypair_decrypt(pkt->p_keypair, pkt->p_nonce, m) != 0)
1535 		goto out;
1536 
1537 	/* A packet with length 0 is a keepalive packet */
1538 	if (__predict_false(m->m_pkthdr.len == 0)) {
1539 		DPRINTF(sc, "Receiving keepalive packet from peer "
1540 		    "%" PRIu64 "\n", peer->p_id);
1541 		state = WG_PACKET_CRYPTED;
1542 		goto out;
1543 	}
1544 
1545 	/*
1546 	 * We can let the network stack handle the intricate validation of the
1547 	 * IP header, we just worry about the sizeof and the version, so we can
1548 	 * read the source address in wg_aip_lookup.
1549 	 */
1550 
1551 	if (determine_af_and_pullup(&m, &pkt->p_af) == 0) {
1552 		if (pkt->p_af == AF_INET) {
1553 			struct ip *ip = mtod(m, struct ip *);
1554 			allowed_peer = wg_aip_lookup(sc, AF_INET, &ip->ip_src);
1555 			len = ntohs(ip->ip_len);
1556 			if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1557 				m_adj(m, len - m->m_pkthdr.len);
1558 		} else if (pkt->p_af == AF_INET6) {
1559 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1560 			allowed_peer = wg_aip_lookup(sc, AF_INET6, &ip6->ip6_src);
1561 			len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1562 			if (len < m->m_pkthdr.len)
1563 				m_adj(m, len - m->m_pkthdr.len);
1564 		} else
1565 			panic("determine_af_and_pullup returned unexpected value");
1566 	} else {
1567 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from peer %" PRIu64 "\n", peer->p_id);
1568 		goto out;
1569 	}
1570 
1571 	/* We only want to compare the address, not dereference, so drop the ref. */
1572 	if (allowed_peer != NULL)
1573 		noise_remote_put(allowed_peer->p_remote);
1574 
1575 	if (__predict_false(peer != allowed_peer)) {
1576 		DPRINTF(sc, "Packet has unallowed src IP from peer %" PRIu64 "\n", peer->p_id);
1577 		goto out;
1578 	}
1579 
1580 	wg_mbuf_reset(m);
1581 	state = WG_PACKET_CRYPTED;
1582 out:
1583 	pkt->p_mbuf = m;
1584 	wmb();
1585 	pkt->p_state = state;
1586 	GROUPTASK_ENQUEUE(&peer->p_recv);
1587 	noise_remote_put(remote);
1588 }
1589 
1590 static void
1591 wg_softc_decrypt(struct wg_softc *sc)
1592 {
1593 	struct wg_packet *pkt;
1594 
1595 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_decrypt_parallel)) != NULL)
1596 		wg_decrypt(sc, pkt);
1597 }
1598 
1599 static void
1600 wg_softc_encrypt(struct wg_softc *sc)
1601 {
1602 	struct wg_packet *pkt;
1603 
1604 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_encrypt_parallel)) != NULL)
1605 		wg_encrypt(sc, pkt);
1606 }
1607 
1608 static void
1609 wg_encrypt_dispatch(struct wg_softc *sc)
1610 {
1611 	/*
1612 	 * The update to encrypt_last_cpu is racey such that we may
1613 	 * reschedule the task for the same CPU multiple times, but
1614 	 * the race doesn't really matter.
1615 	 */
1616 	u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus;
1617 	sc->sc_encrypt_last_cpu = cpu;
1618 	GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]);
1619 }
1620 
1621 static void
1622 wg_decrypt_dispatch(struct wg_softc *sc)
1623 {
1624 	u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus;
1625 	sc->sc_decrypt_last_cpu = cpu;
1626 	GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]);
1627 }
1628 
1629 static void
1630 wg_deliver_out(struct wg_peer *peer)
1631 {
1632 	struct wg_endpoint	 endpoint;
1633 	struct wg_softc		*sc = peer->p_sc;
1634 	struct wg_packet	*pkt;
1635 	struct mbuf		*m;
1636 	int			 rc, len;
1637 
1638 	wg_peer_get_endpoint(peer, &endpoint);
1639 
1640 	while ((pkt = wg_queue_dequeue_serial(&peer->p_encrypt_serial)) != NULL) {
1641 		if (pkt->p_state != WG_PACKET_CRYPTED)
1642 			goto error;
1643 
1644 		m = pkt->p_mbuf;
1645 		pkt->p_mbuf = NULL;
1646 
1647 		len = m->m_pkthdr.len;
1648 
1649 		wg_timers_event_any_authenticated_packet_traversal(peer);
1650 		wg_timers_event_any_authenticated_packet_sent(peer);
1651 		rc = wg_send(sc, &endpoint, m);
1652 		if (rc == 0) {
1653 			if (len > (sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN))
1654 				wg_timers_event_data_sent(peer);
1655 			counter_u64_add(peer->p_tx_bytes, len);
1656 		} else if (rc == EADDRNOTAVAIL) {
1657 			wg_peer_clear_src(peer);
1658 			wg_peer_get_endpoint(peer, &endpoint);
1659 			goto error;
1660 		} else {
1661 			goto error;
1662 		}
1663 		wg_packet_free(pkt);
1664 		if (noise_keep_key_fresh_send(peer->p_remote))
1665 			wg_timers_event_want_initiation(peer);
1666 		continue;
1667 error:
1668 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1669 		wg_packet_free(pkt);
1670 	}
1671 }
1672 
1673 static void
1674 wg_deliver_in(struct wg_peer *peer)
1675 {
1676 	struct wg_softc		*sc = peer->p_sc;
1677 	struct ifnet		*ifp = sc->sc_ifp;
1678 	struct wg_packet	*pkt;
1679 	struct mbuf		*m;
1680 	struct epoch_tracker	 et;
1681 
1682 	while ((pkt = wg_queue_dequeue_serial(&peer->p_decrypt_serial)) != NULL) {
1683 		if (pkt->p_state != WG_PACKET_CRYPTED)
1684 			goto error;
1685 
1686 		m = pkt->p_mbuf;
1687 		if (noise_keypair_nonce_check(pkt->p_keypair, pkt->p_nonce) != 0)
1688 			goto error;
1689 
1690 		if (noise_keypair_received_with(pkt->p_keypair) == ECONNRESET)
1691 			wg_timers_event_handshake_complete(peer);
1692 
1693 		wg_timers_event_any_authenticated_packet_received(peer);
1694 		wg_timers_event_any_authenticated_packet_traversal(peer);
1695 		wg_peer_set_endpoint(peer, &pkt->p_endpoint);
1696 
1697 		counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len +
1698 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1699 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1700 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len +
1701 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
1702 
1703 		if (m->m_pkthdr.len == 0)
1704 			goto done;
1705 
1706 		MPASS(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
1707 		pkt->p_mbuf = NULL;
1708 
1709 		m->m_pkthdr.rcvif = ifp;
1710 
1711 		NET_EPOCH_ENTER(et);
1712 		BPF_MTAP2_AF(ifp, m, pkt->p_af);
1713 
1714 		CURVNET_SET(ifp->if_vnet);
1715 		M_SETFIB(m, ifp->if_fib);
1716 		if (pkt->p_af == AF_INET)
1717 			netisr_dispatch(NETISR_IP, m);
1718 		if (pkt->p_af == AF_INET6)
1719 			netisr_dispatch(NETISR_IPV6, m);
1720 		CURVNET_RESTORE();
1721 		NET_EPOCH_EXIT(et);
1722 
1723 		wg_timers_event_data_received(peer);
1724 
1725 done:
1726 		if (noise_keep_key_fresh_recv(peer->p_remote))
1727 			wg_timers_event_want_initiation(peer);
1728 		wg_packet_free(pkt);
1729 		continue;
1730 error:
1731 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1732 		wg_packet_free(pkt);
1733 	}
1734 }
1735 
1736 static struct wg_packet *
1737 wg_packet_alloc(struct mbuf *m)
1738 {
1739 	struct wg_packet *pkt;
1740 
1741 	if ((pkt = uma_zalloc(wg_packet_zone, M_NOWAIT | M_ZERO)) == NULL)
1742 		return (NULL);
1743 	pkt->p_mbuf = m;
1744 	return (pkt);
1745 }
1746 
1747 static void
1748 wg_packet_free(struct wg_packet *pkt)
1749 {
1750 	if (pkt->p_keypair != NULL)
1751 		noise_keypair_put(pkt->p_keypair);
1752 	if (pkt->p_mbuf != NULL)
1753 		m_freem(pkt->p_mbuf);
1754 	uma_zfree(wg_packet_zone, pkt);
1755 }
1756 
1757 static void
1758 wg_queue_init(struct wg_queue *queue, const char *name)
1759 {
1760 	mtx_init(&queue->q_mtx, name, NULL, MTX_DEF);
1761 	STAILQ_INIT(&queue->q_queue);
1762 	queue->q_len = 0;
1763 }
1764 
1765 static void
1766 wg_queue_deinit(struct wg_queue *queue)
1767 {
1768 	wg_queue_purge(queue);
1769 	mtx_destroy(&queue->q_mtx);
1770 }
1771 
1772 static size_t
1773 wg_queue_len(struct wg_queue *queue)
1774 {
1775 	return (queue->q_len);
1776 }
1777 
1778 static int
1779 wg_queue_enqueue_handshake(struct wg_queue *hs, struct wg_packet *pkt)
1780 {
1781 	int ret = 0;
1782 	mtx_lock(&hs->q_mtx);
1783 	if (hs->q_len < MAX_QUEUED_HANDSHAKES) {
1784 		STAILQ_INSERT_TAIL(&hs->q_queue, pkt, p_parallel);
1785 		hs->q_len++;
1786 	} else {
1787 		ret = ENOBUFS;
1788 	}
1789 	mtx_unlock(&hs->q_mtx);
1790 	if (ret != 0)
1791 		wg_packet_free(pkt);
1792 	return (ret);
1793 }
1794 
1795 static struct wg_packet *
1796 wg_queue_dequeue_handshake(struct wg_queue *hs)
1797 {
1798 	struct wg_packet *pkt;
1799 	mtx_lock(&hs->q_mtx);
1800 	if ((pkt = STAILQ_FIRST(&hs->q_queue)) != NULL) {
1801 		STAILQ_REMOVE_HEAD(&hs->q_queue, p_parallel);
1802 		hs->q_len--;
1803 	}
1804 	mtx_unlock(&hs->q_mtx);
1805 	return (pkt);
1806 }
1807 
1808 static void
1809 wg_queue_push_staged(struct wg_queue *staged, struct wg_packet *pkt)
1810 {
1811 	struct wg_packet *old = NULL;
1812 
1813 	mtx_lock(&staged->q_mtx);
1814 	if (staged->q_len >= MAX_STAGED_PKT) {
1815 		old = STAILQ_FIRST(&staged->q_queue);
1816 		STAILQ_REMOVE_HEAD(&staged->q_queue, p_parallel);
1817 		staged->q_len--;
1818 	}
1819 	STAILQ_INSERT_TAIL(&staged->q_queue, pkt, p_parallel);
1820 	staged->q_len++;
1821 	mtx_unlock(&staged->q_mtx);
1822 
1823 	if (old != NULL)
1824 		wg_packet_free(old);
1825 }
1826 
1827 static void
1828 wg_queue_enlist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1829 {
1830 	struct wg_packet *pkt, *tpkt;
1831 	STAILQ_FOREACH_SAFE(pkt, list, p_parallel, tpkt)
1832 		wg_queue_push_staged(staged, pkt);
1833 }
1834 
1835 static void
1836 wg_queue_delist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1837 {
1838 	STAILQ_INIT(list);
1839 	mtx_lock(&staged->q_mtx);
1840 	STAILQ_CONCAT(list, &staged->q_queue);
1841 	staged->q_len = 0;
1842 	mtx_unlock(&staged->q_mtx);
1843 }
1844 
1845 static void
1846 wg_queue_purge(struct wg_queue *staged)
1847 {
1848 	struct wg_packet_list list;
1849 	struct wg_packet *pkt, *tpkt;
1850 	wg_queue_delist_staged(staged, &list);
1851 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt)
1852 		wg_packet_free(pkt);
1853 }
1854 
1855 static int
1856 wg_queue_both(struct wg_queue *parallel, struct wg_queue *serial, struct wg_packet *pkt)
1857 {
1858 	pkt->p_state = WG_PACKET_UNCRYPTED;
1859 
1860 	mtx_lock(&serial->q_mtx);
1861 	if (serial->q_len < MAX_QUEUED_PKT) {
1862 		serial->q_len++;
1863 		STAILQ_INSERT_TAIL(&serial->q_queue, pkt, p_serial);
1864 	} else {
1865 		mtx_unlock(&serial->q_mtx);
1866 		wg_packet_free(pkt);
1867 		return (ENOBUFS);
1868 	}
1869 	mtx_unlock(&serial->q_mtx);
1870 
1871 	mtx_lock(&parallel->q_mtx);
1872 	if (parallel->q_len < MAX_QUEUED_PKT) {
1873 		parallel->q_len++;
1874 		STAILQ_INSERT_TAIL(&parallel->q_queue, pkt, p_parallel);
1875 	} else {
1876 		mtx_unlock(&parallel->q_mtx);
1877 		pkt->p_state = WG_PACKET_DEAD;
1878 		return (ENOBUFS);
1879 	}
1880 	mtx_unlock(&parallel->q_mtx);
1881 
1882 	return (0);
1883 }
1884 
1885 static struct wg_packet *
1886 wg_queue_dequeue_serial(struct wg_queue *serial)
1887 {
1888 	struct wg_packet *pkt = NULL;
1889 	mtx_lock(&serial->q_mtx);
1890 	if (serial->q_len > 0 && STAILQ_FIRST(&serial->q_queue)->p_state != WG_PACKET_UNCRYPTED) {
1891 		serial->q_len--;
1892 		pkt = STAILQ_FIRST(&serial->q_queue);
1893 		STAILQ_REMOVE_HEAD(&serial->q_queue, p_serial);
1894 	}
1895 	mtx_unlock(&serial->q_mtx);
1896 	return (pkt);
1897 }
1898 
1899 static struct wg_packet *
1900 wg_queue_dequeue_parallel(struct wg_queue *parallel)
1901 {
1902 	struct wg_packet *pkt = NULL;
1903 	mtx_lock(&parallel->q_mtx);
1904 	if (parallel->q_len > 0) {
1905 		parallel->q_len--;
1906 		pkt = STAILQ_FIRST(&parallel->q_queue);
1907 		STAILQ_REMOVE_HEAD(&parallel->q_queue, p_parallel);
1908 	}
1909 	mtx_unlock(&parallel->q_mtx);
1910 	return (pkt);
1911 }
1912 
1913 static bool
1914 wg_input(struct mbuf *m, int offset, struct inpcb *inpcb,
1915     const struct sockaddr *sa, void *_sc)
1916 {
1917 #ifdef INET
1918 	const struct sockaddr_in	*sin;
1919 #endif
1920 #ifdef INET6
1921 	const struct sockaddr_in6	*sin6;
1922 #endif
1923 	struct noise_remote		*remote;
1924 	struct wg_pkt_data		*data;
1925 	struct wg_packet		*pkt;
1926 	struct wg_peer			*peer;
1927 	struct wg_softc			*sc = _sc;
1928 	struct mbuf			*defragged;
1929 
1930 	defragged = m_defrag(m, M_NOWAIT);
1931 	if (defragged)
1932 		m = defragged;
1933 	m = m_unshare(m, M_NOWAIT);
1934 	if (!m) {
1935 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1936 		return true;
1937 	}
1938 
1939 	/* Caller provided us with `sa`, no need for this header. */
1940 	m_adj(m, offset + sizeof(struct udphdr));
1941 
1942 	/* Pullup enough to read packet type */
1943 	if ((m = m_pullup(m, sizeof(uint32_t))) == NULL) {
1944 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1945 		return true;
1946 	}
1947 
1948 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1949 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1950 		m_freem(m);
1951 		return true;
1952 	}
1953 
1954 	/* Save send/recv address and port for later. */
1955 	switch (sa->sa_family) {
1956 #ifdef INET
1957 	case AF_INET:
1958 		sin = (const struct sockaddr_in *)sa;
1959 		pkt->p_endpoint.e_remote.r_sin = sin[0];
1960 		pkt->p_endpoint.e_local.l_in = sin[1].sin_addr;
1961 		break;
1962 #endif
1963 #ifdef INET6
1964 	case AF_INET6:
1965 		sin6 = (const struct sockaddr_in6 *)sa;
1966 		pkt->p_endpoint.e_remote.r_sin6 = sin6[0];
1967 		pkt->p_endpoint.e_local.l_in6 = sin6[1].sin6_addr;
1968 		break;
1969 #endif
1970 	default:
1971 		goto error;
1972 	}
1973 
1974 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
1975 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
1976 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
1977 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
1978 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
1979 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
1980 
1981 		if (wg_queue_enqueue_handshake(&sc->sc_handshake_queue, pkt) != 0) {
1982 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
1983 			DPRINTF(sc, "Dropping handshake packet\n");
1984 		}
1985 		GROUPTASK_ENQUEUE(&sc->sc_handshake);
1986 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
1987 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
1988 
1989 		/* Pullup whole header to read r_idx below. */
1990 		if ((pkt->p_mbuf = m_pullup(m, sizeof(struct wg_pkt_data))) == NULL)
1991 			goto error;
1992 
1993 		data = mtod(pkt->p_mbuf, struct wg_pkt_data *);
1994 		if ((pkt->p_keypair = noise_keypair_lookup(sc->sc_local, data->r_idx)) == NULL)
1995 			goto error;
1996 
1997 		remote = noise_keypair_remote(pkt->p_keypair);
1998 		peer = noise_remote_arg(remote);
1999 		if (wg_queue_both(&sc->sc_decrypt_parallel, &peer->p_decrypt_serial, pkt) != 0)
2000 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
2001 		wg_decrypt_dispatch(sc);
2002 		noise_remote_put(remote);
2003 	} else {
2004 		goto error;
2005 	}
2006 	return true;
2007 error:
2008 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2009 	wg_packet_free(pkt);
2010 	return true;
2011 }
2012 
2013 static void
2014 wg_peer_send_staged(struct wg_peer *peer)
2015 {
2016 	struct wg_packet_list	 list;
2017 	struct noise_keypair	*keypair;
2018 	struct wg_packet	*pkt, *tpkt;
2019 	struct wg_softc		*sc = peer->p_sc;
2020 
2021 	wg_queue_delist_staged(&peer->p_stage_queue, &list);
2022 
2023 	if (STAILQ_EMPTY(&list))
2024 		return;
2025 
2026 	if ((keypair = noise_keypair_current(peer->p_remote)) == NULL)
2027 		goto error;
2028 
2029 	STAILQ_FOREACH(pkt, &list, p_parallel) {
2030 		if (noise_keypair_nonce_next(keypair, &pkt->p_nonce) != 0)
2031 			goto error_keypair;
2032 	}
2033 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt) {
2034 		pkt->p_keypair = noise_keypair_ref(keypair);
2035 		if (wg_queue_both(&sc->sc_encrypt_parallel, &peer->p_encrypt_serial, pkt) != 0)
2036 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
2037 	}
2038 	wg_encrypt_dispatch(sc);
2039 	noise_keypair_put(keypair);
2040 	return;
2041 
2042 error_keypair:
2043 	noise_keypair_put(keypair);
2044 error:
2045 	wg_queue_enlist_staged(&peer->p_stage_queue, &list);
2046 	wg_timers_event_want_initiation(peer);
2047 }
2048 
2049 static inline void
2050 xmit_err(struct ifnet *ifp, struct mbuf *m, struct wg_packet *pkt, sa_family_t af)
2051 {
2052 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2053 	switch (af) {
2054 #ifdef INET
2055 	case AF_INET:
2056 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
2057 		if (pkt)
2058 			pkt->p_mbuf = NULL;
2059 		m = NULL;
2060 		break;
2061 #endif
2062 #ifdef INET6
2063 	case AF_INET6:
2064 		icmp6_error(m, ICMP6_DST_UNREACH, 0, 0);
2065 		if (pkt)
2066 			pkt->p_mbuf = NULL;
2067 		m = NULL;
2068 		break;
2069 #endif
2070 	}
2071 	if (pkt)
2072 		wg_packet_free(pkt);
2073 	else if (m)
2074 		m_freem(m);
2075 }
2076 
2077 static int
2078 wg_xmit(struct ifnet *ifp, struct mbuf *m, sa_family_t af, uint32_t mtu)
2079 {
2080 	struct wg_packet	*pkt = NULL;
2081 	struct wg_softc		*sc = ifp->if_softc;
2082 	struct wg_peer		*peer;
2083 	int			 rc = 0;
2084 	sa_family_t		 peer_af;
2085 
2086 	/* Work around lifetime issue in the ipv6 mld code. */
2087 	if (__predict_false((ifp->if_flags & IFF_DYING) || !sc)) {
2088 		rc = ENXIO;
2089 		goto err_xmit;
2090 	}
2091 
2092 	if ((pkt = wg_packet_alloc(m)) == NULL) {
2093 		rc = ENOBUFS;
2094 		goto err_xmit;
2095 	}
2096 	pkt->p_mtu = mtu;
2097 	pkt->p_af = af;
2098 
2099 	if (af == AF_INET) {
2100 		peer = wg_aip_lookup(sc, AF_INET, &mtod(m, struct ip *)->ip_dst);
2101 	} else if (af == AF_INET6) {
2102 		peer = wg_aip_lookup(sc, AF_INET6, &mtod(m, struct ip6_hdr *)->ip6_dst);
2103 	} else {
2104 		rc = EAFNOSUPPORT;
2105 		goto err_xmit;
2106 	}
2107 
2108 	BPF_MTAP2_AF(ifp, m, pkt->p_af);
2109 
2110 	if (__predict_false(peer == NULL)) {
2111 		rc = ENOKEY;
2112 		goto err_xmit;
2113 	}
2114 
2115 	if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_WGLOOP, MAX_LOOPS))) {
2116 		DPRINTF(sc, "Packet looped");
2117 		rc = ELOOP;
2118 		goto err_peer;
2119 	}
2120 
2121 	peer_af = peer->p_endpoint.e_remote.r_sa.sa_family;
2122 	if (__predict_false(peer_af != AF_INET && peer_af != AF_INET6)) {
2123 		DPRINTF(sc, "No valid endpoint has been configured or "
2124 			    "discovered for peer %" PRIu64 "\n", peer->p_id);
2125 		rc = EHOSTUNREACH;
2126 		goto err_peer;
2127 	}
2128 
2129 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
2130 	wg_peer_send_staged(peer);
2131 	noise_remote_put(peer->p_remote);
2132 	return (0);
2133 
2134 err_peer:
2135 	noise_remote_put(peer->p_remote);
2136 err_xmit:
2137 	xmit_err(ifp, m, pkt, af);
2138 	return (rc);
2139 }
2140 
2141 static inline int
2142 determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
2143 {
2144 	u_char ipv;
2145 	if ((*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2146 		*m = m_pullup(*m, sizeof(struct ip6_hdr));
2147 	else if ((*m)->m_pkthdr.len >= sizeof(struct ip))
2148 		*m = m_pullup(*m, sizeof(struct ip));
2149 	else
2150 		return (EAFNOSUPPORT);
2151 	if (*m == NULL)
2152 		return (ENOBUFS);
2153 	ipv = mtod(*m, struct ip *)->ip_v;
2154 	if (ipv == 4)
2155 		*af = AF_INET;
2156 	else if (ipv == 6 && (*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2157 		*af = AF_INET6;
2158 	else
2159 		return (EAFNOSUPPORT);
2160 	return (0);
2161 }
2162 
2163 static int
2164 wg_transmit(struct ifnet *ifp, struct mbuf *m)
2165 {
2166 	sa_family_t af;
2167 	int ret;
2168 	struct mbuf *defragged;
2169 
2170 	defragged = m_defrag(m, M_NOWAIT);
2171 	if (defragged)
2172 		m = defragged;
2173 	m = m_unshare(m, M_NOWAIT);
2174 	if (!m) {
2175 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2176 		return (ENOBUFS);
2177 	}
2178 
2179 	ret = determine_af_and_pullup(&m, &af);
2180 	if (ret) {
2181 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2182 		return (ret);
2183 	}
2184 	return (wg_xmit(ifp, m, af, ifp->if_mtu));
2185 }
2186 
2187 static int
2188 wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro)
2189 {
2190 	sa_family_t parsed_af;
2191 	uint32_t af, mtu;
2192 	int ret;
2193 	struct mbuf *defragged;
2194 
2195 	if (dst->sa_family == AF_UNSPEC)
2196 		memcpy(&af, dst->sa_data, sizeof(af));
2197 	else
2198 		af = dst->sa_family;
2199 	if (af == AF_UNSPEC) {
2200 		xmit_err(ifp, m, NULL, af);
2201 		return (EAFNOSUPPORT);
2202 	}
2203 
2204 	defragged = m_defrag(m, M_NOWAIT);
2205 	if (defragged)
2206 		m = defragged;
2207 	m = m_unshare(m, M_NOWAIT);
2208 	if (!m) {
2209 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2210 		return (ENOBUFS);
2211 	}
2212 
2213 	ret = determine_af_and_pullup(&m, &parsed_af);
2214 	if (ret) {
2215 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2216 		return (ret);
2217 	}
2218 	if (parsed_af != af) {
2219 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2220 		return (EAFNOSUPPORT);
2221 	}
2222 	mtu = (ro != NULL && ro->ro_mtu > 0) ? ro->ro_mtu : ifp->if_mtu;
2223 	return (wg_xmit(ifp, m, parsed_af, mtu));
2224 }
2225 
2226 static int
2227 wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
2228 {
2229 	uint8_t			 public[WG_KEY_SIZE];
2230 	const void *pub_key, *preshared_key = NULL;
2231 	const struct sockaddr *endpoint;
2232 	int err;
2233 	size_t size;
2234 	struct noise_remote *remote;
2235 	struct wg_peer *peer = NULL;
2236 	bool need_insert = false;
2237 
2238 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2239 
2240 	if (!nvlist_exists_binary(nvl, "public-key")) {
2241 		return (EINVAL);
2242 	}
2243 	pub_key = nvlist_get_binary(nvl, "public-key", &size);
2244 	if (size != WG_KEY_SIZE) {
2245 		return (EINVAL);
2246 	}
2247 	if (noise_local_keys(sc->sc_local, public, NULL) == 0 &&
2248 	    bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
2249 		return (0); // Silently ignored; not actually a failure.
2250 	}
2251 	if ((remote = noise_remote_lookup(sc->sc_local, pub_key)) != NULL)
2252 		peer = noise_remote_arg(remote);
2253 	if (nvlist_exists_bool(nvl, "remove") &&
2254 		nvlist_get_bool(nvl, "remove")) {
2255 		if (remote != NULL) {
2256 			wg_peer_destroy(peer);
2257 			noise_remote_put(remote);
2258 		}
2259 		return (0);
2260 	}
2261 	if (nvlist_exists_bool(nvl, "replace-allowedips") &&
2262 		nvlist_get_bool(nvl, "replace-allowedips") &&
2263 	    peer != NULL) {
2264 
2265 		wg_aip_remove_all(sc, peer);
2266 	}
2267 	if (peer == NULL) {
2268 		peer = wg_peer_alloc(sc, pub_key);
2269 		need_insert = true;
2270 	}
2271 	if (nvlist_exists_binary(nvl, "endpoint")) {
2272 		endpoint = nvlist_get_binary(nvl, "endpoint", &size);
2273 		if (size > sizeof(peer->p_endpoint.e_remote)) {
2274 			err = EINVAL;
2275 			goto out;
2276 		}
2277 		memcpy(&peer->p_endpoint.e_remote, endpoint, size);
2278 	}
2279 	if (nvlist_exists_binary(nvl, "preshared-key")) {
2280 		preshared_key = nvlist_get_binary(nvl, "preshared-key", &size);
2281 		if (size != WG_KEY_SIZE) {
2282 			err = EINVAL;
2283 			goto out;
2284 		}
2285 		noise_remote_set_psk(peer->p_remote, preshared_key);
2286 	}
2287 	if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
2288 		uint64_t pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
2289 		if (pki > UINT16_MAX) {
2290 			err = EINVAL;
2291 			goto out;
2292 		}
2293 		wg_timers_set_persistent_keepalive(peer, pki);
2294 	}
2295 	if (nvlist_exists_nvlist_array(nvl, "allowed-ips")) {
2296 		const void *addr;
2297 		uint64_t cidr;
2298 		const nvlist_t * const * aipl;
2299 		size_t allowedip_count;
2300 
2301 		aipl = nvlist_get_nvlist_array(nvl, "allowed-ips", &allowedip_count);
2302 		for (size_t idx = 0; idx < allowedip_count; idx++) {
2303 			if (!nvlist_exists_number(aipl[idx], "cidr"))
2304 				continue;
2305 			cidr = nvlist_get_number(aipl[idx], "cidr");
2306 			if (nvlist_exists_binary(aipl[idx], "ipv4")) {
2307 				addr = nvlist_get_binary(aipl[idx], "ipv4", &size);
2308 				if (addr == NULL || cidr > 32 || size != sizeof(struct in_addr)) {
2309 					err = EINVAL;
2310 					goto out;
2311 				}
2312 				if ((err = wg_aip_add(sc, peer, AF_INET, addr, cidr)) != 0)
2313 					goto out;
2314 			} else if (nvlist_exists_binary(aipl[idx], "ipv6")) {
2315 				addr = nvlist_get_binary(aipl[idx], "ipv6", &size);
2316 				if (addr == NULL || cidr > 128 || size != sizeof(struct in6_addr)) {
2317 					err = EINVAL;
2318 					goto out;
2319 				}
2320 				if ((err = wg_aip_add(sc, peer, AF_INET6, addr, cidr)) != 0)
2321 					goto out;
2322 			} else {
2323 				continue;
2324 			}
2325 		}
2326 	}
2327 	if (need_insert) {
2328 		if ((err = noise_remote_enable(peer->p_remote)) != 0)
2329 			goto out;
2330 		TAILQ_INSERT_TAIL(&sc->sc_peers, peer, p_entry);
2331 		sc->sc_peers_num++;
2332 		if (sc->sc_ifp->if_link_state == LINK_STATE_UP)
2333 			wg_timers_enable(peer);
2334 	}
2335 	if (remote != NULL)
2336 		noise_remote_put(remote);
2337 	return (0);
2338 out:
2339 	if (need_insert) /* If we fail, only destroy if it was new. */
2340 		wg_peer_destroy(peer);
2341 	if (remote != NULL)
2342 		noise_remote_put(remote);
2343 	return (err);
2344 }
2345 
2346 static int
2347 wgc_set(struct wg_softc *sc, struct wg_data_io *wgd)
2348 {
2349 	uint8_t public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2350 	struct ifnet *ifp;
2351 	void *nvlpacked;
2352 	nvlist_t *nvl;
2353 	ssize_t size;
2354 	int err;
2355 
2356 	ifp = sc->sc_ifp;
2357 	if (wgd->wgd_size == 0 || wgd->wgd_data == NULL)
2358 		return (EFAULT);
2359 
2360 	/* Can nvlists be streamed in? It's not nice to impose arbitrary limits like that but
2361 	 * there needs to be _some_ limitation. */
2362 	if (wgd->wgd_size >= UINT32_MAX / 2)
2363 		return (E2BIG);
2364 
2365 	nvlpacked = malloc(wgd->wgd_size, M_TEMP, M_WAITOK | M_ZERO);
2366 
2367 	err = copyin(wgd->wgd_data, nvlpacked, wgd->wgd_size);
2368 	if (err)
2369 		goto out;
2370 	nvl = nvlist_unpack(nvlpacked, wgd->wgd_size, 0);
2371 	if (nvl == NULL) {
2372 		err = EBADMSG;
2373 		goto out;
2374 	}
2375 	sx_xlock(&sc->sc_lock);
2376 	if (nvlist_exists_bool(nvl, "replace-peers") &&
2377 		nvlist_get_bool(nvl, "replace-peers"))
2378 		wg_peer_destroy_all(sc);
2379 	if (nvlist_exists_number(nvl, "listen-port")) {
2380 		uint64_t new_port = nvlist_get_number(nvl, "listen-port");
2381 		if (new_port > UINT16_MAX) {
2382 			err = EINVAL;
2383 			goto out_locked;
2384 		}
2385 		if (new_port != sc->sc_socket.so_port) {
2386 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2387 				if ((err = wg_socket_init(sc, new_port)) != 0)
2388 					goto out_locked;
2389 			} else
2390 				sc->sc_socket.so_port = new_port;
2391 		}
2392 	}
2393 	if (nvlist_exists_binary(nvl, "private-key")) {
2394 		const void *key = nvlist_get_binary(nvl, "private-key", &size);
2395 		if (size != WG_KEY_SIZE) {
2396 			err = EINVAL;
2397 			goto out_locked;
2398 		}
2399 
2400 		if (noise_local_keys(sc->sc_local, NULL, private) != 0 ||
2401 		    timingsafe_bcmp(private, key, WG_KEY_SIZE) != 0) {
2402 			struct wg_peer *peer;
2403 
2404 			if (curve25519_generate_public(public, key)) {
2405 				/* Peer conflict: remove conflicting peer. */
2406 				struct noise_remote *remote;
2407 				if ((remote = noise_remote_lookup(sc->sc_local,
2408 				    public)) != NULL) {
2409 					peer = noise_remote_arg(remote);
2410 					wg_peer_destroy(peer);
2411 					noise_remote_put(remote);
2412 				}
2413 			}
2414 
2415 			/*
2416 			 * Set the private key and invalidate all existing
2417 			 * handshakes.
2418 			 */
2419 			/* Note: we might be removing the private key. */
2420 			noise_local_private(sc->sc_local, key);
2421 			if (noise_local_keys(sc->sc_local, NULL, NULL) == 0)
2422 				cookie_checker_update(&sc->sc_cookie, public);
2423 			else
2424 				cookie_checker_update(&sc->sc_cookie, NULL);
2425 		}
2426 	}
2427 	if (nvlist_exists_number(nvl, "user-cookie")) {
2428 		uint64_t user_cookie = nvlist_get_number(nvl, "user-cookie");
2429 		if (user_cookie > UINT32_MAX) {
2430 			err = EINVAL;
2431 			goto out_locked;
2432 		}
2433 		err = wg_socket_set_cookie(sc, user_cookie);
2434 		if (err)
2435 			goto out_locked;
2436 	}
2437 	if (nvlist_exists_nvlist_array(nvl, "peers")) {
2438 		size_t peercount;
2439 		const nvlist_t * const*nvl_peers;
2440 
2441 		nvl_peers = nvlist_get_nvlist_array(nvl, "peers", &peercount);
2442 		for (int i = 0; i < peercount; i++) {
2443 			err = wg_peer_add(sc, nvl_peers[i]);
2444 			if (err != 0)
2445 				goto out_locked;
2446 		}
2447 	}
2448 
2449 out_locked:
2450 	sx_xunlock(&sc->sc_lock);
2451 	nvlist_destroy(nvl);
2452 out:
2453 	explicit_bzero(nvlpacked, wgd->wgd_size);
2454 	free(nvlpacked, M_TEMP);
2455 	return (err);
2456 }
2457 
2458 static int
2459 wgc_get(struct wg_softc *sc, struct wg_data_io *wgd)
2460 {
2461 	uint8_t public_key[WG_KEY_SIZE] = { 0 };
2462 	uint8_t private_key[WG_KEY_SIZE] = { 0 };
2463 	uint8_t preshared_key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
2464 	nvlist_t *nvl, *nvl_peer, *nvl_aip, **nvl_peers, **nvl_aips;
2465 	size_t size, peer_count, aip_count, i, j;
2466 	struct wg_timespec64 ts64;
2467 	struct wg_peer *peer;
2468 	struct wg_aip *aip;
2469 	void *packed;
2470 	int err = 0;
2471 
2472 	nvl = nvlist_create(0);
2473 	if (!nvl)
2474 		return (ENOMEM);
2475 
2476 	sx_slock(&sc->sc_lock);
2477 
2478 	if (sc->sc_socket.so_port != 0)
2479 		nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
2480 	if (sc->sc_socket.so_user_cookie != 0)
2481 		nvlist_add_number(nvl, "user-cookie", sc->sc_socket.so_user_cookie);
2482 	if (noise_local_keys(sc->sc_local, public_key, private_key) == 0) {
2483 		nvlist_add_binary(nvl, "public-key", public_key, WG_KEY_SIZE);
2484 		if (wgc_privileged(sc))
2485 			nvlist_add_binary(nvl, "private-key", private_key, WG_KEY_SIZE);
2486 		explicit_bzero(private_key, sizeof(private_key));
2487 	}
2488 	peer_count = sc->sc_peers_num;
2489 	if (peer_count) {
2490 		nvl_peers = mallocarray(peer_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2491 		i = 0;
2492 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2493 			if (i >= peer_count)
2494 				panic("peers changed from under us");
2495 
2496 			nvl_peers[i++] = nvl_peer = nvlist_create(0);
2497 			if (!nvl_peer) {
2498 				err = ENOMEM;
2499 				goto err_peer;
2500 			}
2501 
2502 			(void)noise_remote_keys(peer->p_remote, public_key, preshared_key);
2503 			nvlist_add_binary(nvl_peer, "public-key", public_key, sizeof(public_key));
2504 			if (wgc_privileged(sc))
2505 				nvlist_add_binary(nvl_peer, "preshared-key", preshared_key, sizeof(preshared_key));
2506 			explicit_bzero(preshared_key, sizeof(preshared_key));
2507 			if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET)
2508 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in));
2509 			else if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET6)
2510 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in6));
2511 			wg_timers_get_last_handshake(peer, &ts64);
2512 			nvlist_add_binary(nvl_peer, "last-handshake-time", &ts64, sizeof(ts64));
2513 			nvlist_add_number(nvl_peer, "persistent-keepalive-interval", peer->p_persistent_keepalive_interval);
2514 			nvlist_add_number(nvl_peer, "rx-bytes", counter_u64_fetch(peer->p_rx_bytes));
2515 			nvlist_add_number(nvl_peer, "tx-bytes", counter_u64_fetch(peer->p_tx_bytes));
2516 
2517 			aip_count = peer->p_aips_num;
2518 			if (aip_count) {
2519 				nvl_aips = mallocarray(aip_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
2520 				j = 0;
2521 				LIST_FOREACH(aip, &peer->p_aips, a_entry) {
2522 					if (j >= aip_count)
2523 						panic("aips changed from under us");
2524 
2525 					nvl_aips[j++] = nvl_aip = nvlist_create(0);
2526 					if (!nvl_aip) {
2527 						err = ENOMEM;
2528 						goto err_aip;
2529 					}
2530 					if (aip->a_af == AF_INET) {
2531 						nvlist_add_binary(nvl_aip, "ipv4", &aip->a_addr.in, sizeof(aip->a_addr.in));
2532 						nvlist_add_number(nvl_aip, "cidr", bitcount32(aip->a_mask.ip));
2533 					}
2534 #ifdef INET6
2535 					else if (aip->a_af == AF_INET6) {
2536 						nvlist_add_binary(nvl_aip, "ipv6", &aip->a_addr.in6, sizeof(aip->a_addr.in6));
2537 						nvlist_add_number(nvl_aip, "cidr", in6_mask2len(&aip->a_mask.in6, NULL));
2538 					}
2539 #endif
2540 				}
2541 				nvlist_add_nvlist_array(nvl_peer, "allowed-ips", (const nvlist_t *const *)nvl_aips, aip_count);
2542 			err_aip:
2543 				for (j = 0; j < aip_count; ++j)
2544 					nvlist_destroy(nvl_aips[j]);
2545 				free(nvl_aips, M_NVLIST);
2546 				if (err)
2547 					goto err_peer;
2548 			}
2549 		}
2550 		nvlist_add_nvlist_array(nvl, "peers", (const nvlist_t * const *)nvl_peers, peer_count);
2551 	err_peer:
2552 		for (i = 0; i < peer_count; ++i)
2553 			nvlist_destroy(nvl_peers[i]);
2554 		free(nvl_peers, M_NVLIST);
2555 		if (err) {
2556 			sx_sunlock(&sc->sc_lock);
2557 			goto err;
2558 		}
2559 	}
2560 	sx_sunlock(&sc->sc_lock);
2561 	packed = nvlist_pack(nvl, &size);
2562 	if (!packed) {
2563 		err = ENOMEM;
2564 		goto err;
2565 	}
2566 	if (!wgd->wgd_size) {
2567 		wgd->wgd_size = size;
2568 		goto out;
2569 	}
2570 	if (wgd->wgd_size < size) {
2571 		err = ENOSPC;
2572 		goto out;
2573 	}
2574 	err = copyout(packed, wgd->wgd_data, size);
2575 	wgd->wgd_size = size;
2576 
2577 out:
2578 	explicit_bzero(packed, size);
2579 	free(packed, M_NVLIST);
2580 err:
2581 	nvlist_destroy(nvl);
2582 	return (err);
2583 }
2584 
2585 static int
2586 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2587 {
2588 	struct wg_data_io *wgd = (struct wg_data_io *)data;
2589 	struct ifreq *ifr = (struct ifreq *)data;
2590 	struct wg_softc *sc;
2591 	int ret = 0;
2592 
2593 	sx_slock(&wg_sx);
2594 	sc = ifp->if_softc;
2595 	if (!sc) {
2596 		ret = ENXIO;
2597 		goto out;
2598 	}
2599 
2600 	switch (cmd) {
2601 	case SIOCSWG:
2602 		ret = priv_check(curthread, PRIV_NET_WG);
2603 		if (ret == 0)
2604 			ret = wgc_set(sc, wgd);
2605 		break;
2606 	case SIOCGWG:
2607 		ret = wgc_get(sc, wgd);
2608 		break;
2609 	/* Interface IOCTLs */
2610 	case SIOCSIFADDR:
2611 		/*
2612 		 * This differs from *BSD norms, but is more uniform with how
2613 		 * WireGuard behaves elsewhere.
2614 		 */
2615 		break;
2616 	case SIOCSIFFLAGS:
2617 		if (ifp->if_flags & IFF_UP)
2618 			ret = wg_up(sc);
2619 		else
2620 			wg_down(sc);
2621 		break;
2622 	case SIOCSIFMTU:
2623 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
2624 			ret = EINVAL;
2625 		else
2626 			ifp->if_mtu = ifr->ifr_mtu;
2627 		break;
2628 	case SIOCADDMULTI:
2629 	case SIOCDELMULTI:
2630 		break;
2631 	case SIOCGTUNFIB:
2632 		ifr->ifr_fib = sc->sc_socket.so_fibnum;
2633 		break;
2634 	case SIOCSTUNFIB:
2635 		ret = priv_check(curthread, PRIV_NET_WG);
2636 		if (ret)
2637 			break;
2638 		ret = priv_check(curthread, PRIV_NET_SETIFFIB);
2639 		if (ret)
2640 			break;
2641 		sx_xlock(&sc->sc_lock);
2642 		ret = wg_socket_set_fibnum(sc, ifr->ifr_fib);
2643 		sx_xunlock(&sc->sc_lock);
2644 		break;
2645 	default:
2646 		ret = ENOTTY;
2647 	}
2648 
2649 out:
2650 	sx_sunlock(&wg_sx);
2651 	return (ret);
2652 }
2653 
2654 static int
2655 wg_up(struct wg_softc *sc)
2656 {
2657 	struct ifnet *ifp = sc->sc_ifp;
2658 	struct wg_peer *peer;
2659 	int rc = EBUSY;
2660 
2661 	sx_xlock(&sc->sc_lock);
2662 	/* Jail's being removed, no more wg_up(). */
2663 	if ((sc->sc_flags & WGF_DYING) != 0)
2664 		goto out;
2665 
2666 	/* Silent success if we're already running. */
2667 	rc = 0;
2668 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2669 		goto out;
2670 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2671 
2672 	rc = wg_socket_init(sc, sc->sc_socket.so_port);
2673 	if (rc == 0) {
2674 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
2675 			wg_timers_enable(peer);
2676 		if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
2677 	} else {
2678 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2679 		DPRINTF(sc, "Unable to initialize sockets: %d\n", rc);
2680 	}
2681 out:
2682 	sx_xunlock(&sc->sc_lock);
2683 	return (rc);
2684 }
2685 
2686 static void
2687 wg_down(struct wg_softc *sc)
2688 {
2689 	struct ifnet *ifp = sc->sc_ifp;
2690 	struct wg_peer *peer;
2691 
2692 	sx_xlock(&sc->sc_lock);
2693 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2694 		sx_xunlock(&sc->sc_lock);
2695 		return;
2696 	}
2697 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2698 
2699 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2700 		wg_queue_purge(&peer->p_stage_queue);
2701 		wg_timers_disable(peer);
2702 	}
2703 
2704 	wg_queue_purge(&sc->sc_handshake_queue);
2705 
2706 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2707 		noise_remote_handshake_clear(peer->p_remote);
2708 		noise_remote_keypairs_clear(peer->p_remote);
2709 	}
2710 
2711 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2712 	wg_socket_uninit(sc);
2713 
2714 	sx_xunlock(&sc->sc_lock);
2715 }
2716 
2717 static int
2718 wg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2719 {
2720 	struct wg_softc *sc;
2721 	struct ifnet *ifp;
2722 
2723 	sc = malloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
2724 
2725 	sc->sc_local = noise_local_alloc(sc);
2726 
2727 	sc->sc_encrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2728 
2729 	sc->sc_decrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
2730 
2731 	if (!rn_inithead((void **)&sc->sc_aip4, offsetof(struct aip_addr, in) * NBBY))
2732 		goto free_decrypt;
2733 
2734 	if (!rn_inithead((void **)&sc->sc_aip6, offsetof(struct aip_addr, in6) * NBBY))
2735 		goto free_aip4;
2736 
2737 	atomic_add_int(&clone_count, 1);
2738 	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
2739 
2740 	sc->sc_ucred = crhold(curthread->td_ucred);
2741 	sc->sc_socket.so_fibnum = curthread->td_proc->p_fibnum;
2742 	sc->sc_socket.so_port = 0;
2743 
2744 	TAILQ_INIT(&sc->sc_peers);
2745 	sc->sc_peers_num = 0;
2746 
2747 	cookie_checker_init(&sc->sc_cookie);
2748 
2749 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip4);
2750 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip6);
2751 
2752 	GROUPTASK_INIT(&sc->sc_handshake, 0, (gtask_fn_t *)wg_softc_handshake_receive, sc);
2753 	taskqgroup_attach(qgroup_wg_tqg, &sc->sc_handshake, sc, NULL, NULL, "wg tx initiation");
2754 	wg_queue_init(&sc->sc_handshake_queue, "hsq");
2755 
2756 	for (int i = 0; i < mp_ncpus; i++) {
2757 		GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
2758 		     (gtask_fn_t *)wg_softc_encrypt, sc);
2759 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_encrypt[i], sc, i, NULL, NULL, "wg encrypt");
2760 		GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
2761 		    (gtask_fn_t *)wg_softc_decrypt, sc);
2762 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_decrypt[i], sc, i, NULL, NULL, "wg decrypt");
2763 	}
2764 
2765 	wg_queue_init(&sc->sc_encrypt_parallel, "encp");
2766 	wg_queue_init(&sc->sc_decrypt_parallel, "decp");
2767 
2768 	sx_init(&sc->sc_lock, "wg softc lock");
2769 
2770 	ifp->if_softc = sc;
2771 	ifp->if_capabilities = ifp->if_capenable = WG_CAPS;
2772 	if_initname(ifp, wgname, unit);
2773 
2774 	if_setmtu(ifp, DEFAULT_MTU);
2775 	ifp->if_flags = IFF_NOARP | IFF_MULTICAST;
2776 	ifp->if_init = wg_init;
2777 	ifp->if_reassign = wg_reassign;
2778 	ifp->if_qflush = wg_qflush;
2779 	ifp->if_transmit = wg_transmit;
2780 	ifp->if_output = wg_output;
2781 	ifp->if_ioctl = wg_ioctl;
2782 	if_attach(ifp);
2783 	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
2784 #ifdef INET6
2785 	ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
2786 	ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD;
2787 #endif
2788 	sx_xlock(&wg_sx);
2789 	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
2790 	sx_xunlock(&wg_sx);
2791 	return (0);
2792 free_aip4:
2793 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2794 	free(sc->sc_aip4, M_RTABLE);
2795 free_decrypt:
2796 	free(sc->sc_decrypt, M_WG);
2797 	free(sc->sc_encrypt, M_WG);
2798 	noise_local_free(sc->sc_local, NULL);
2799 	free(sc, M_WG);
2800 	return (ENOMEM);
2801 }
2802 
2803 static void
2804 wg_clone_deferred_free(struct noise_local *l)
2805 {
2806 	struct wg_softc *sc = noise_local_arg(l);
2807 
2808 	free(sc, M_WG);
2809 	atomic_add_int(&clone_count, -1);
2810 }
2811 
2812 static void
2813 wg_clone_destroy(struct ifnet *ifp)
2814 {
2815 	struct wg_softc *sc = ifp->if_softc;
2816 	struct ucred *cred;
2817 
2818 	sx_xlock(&wg_sx);
2819 	ifp->if_softc = NULL;
2820 	sx_xlock(&sc->sc_lock);
2821 	sc->sc_flags |= WGF_DYING;
2822 	cred = sc->sc_ucred;
2823 	sc->sc_ucred = NULL;
2824 	sx_xunlock(&sc->sc_lock);
2825 	LIST_REMOVE(sc, sc_entry);
2826 	sx_xunlock(&wg_sx);
2827 
2828 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2829 	CURVNET_SET(sc->sc_ifp->if_vnet);
2830 	if_purgeaddrs(sc->sc_ifp);
2831 	CURVNET_RESTORE();
2832 
2833 	sx_xlock(&sc->sc_lock);
2834 	wg_socket_uninit(sc);
2835 	sx_xunlock(&sc->sc_lock);
2836 
2837 	/*
2838 	 * No guarantees that all traffic have passed until the epoch has
2839 	 * elapsed with the socket closed.
2840 	 */
2841 	NET_EPOCH_WAIT();
2842 
2843 	taskqgroup_drain_all(qgroup_wg_tqg);
2844 	sx_xlock(&sc->sc_lock);
2845 	wg_peer_destroy_all(sc);
2846 	epoch_drain_callbacks(net_epoch_preempt);
2847 	sx_xunlock(&sc->sc_lock);
2848 	sx_destroy(&sc->sc_lock);
2849 	taskqgroup_detach(qgroup_wg_tqg, &sc->sc_handshake);
2850 	for (int i = 0; i < mp_ncpus; i++) {
2851 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_encrypt[i]);
2852 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_decrypt[i]);
2853 	}
2854 	free(sc->sc_encrypt, M_WG);
2855 	free(sc->sc_decrypt, M_WG);
2856 	wg_queue_deinit(&sc->sc_handshake_queue);
2857 	wg_queue_deinit(&sc->sc_encrypt_parallel);
2858 	wg_queue_deinit(&sc->sc_decrypt_parallel);
2859 
2860 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
2861 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip6);
2862 	rn_detachhead((void **)&sc->sc_aip4);
2863 	rn_detachhead((void **)&sc->sc_aip6);
2864 
2865 	cookie_checker_free(&sc->sc_cookie);
2866 
2867 	if (cred != NULL)
2868 		crfree(cred);
2869 	if_detach(sc->sc_ifp);
2870 	if_free(sc->sc_ifp);
2871 
2872 	noise_local_free(sc->sc_local, wg_clone_deferred_free);
2873 }
2874 
2875 static void
2876 wg_qflush(struct ifnet *ifp __unused)
2877 {
2878 }
2879 
2880 /*
2881  * Privileged information (private-key, preshared-key) are only exported for
2882  * root and jailed root by default.
2883  */
2884 static bool
2885 wgc_privileged(struct wg_softc *sc)
2886 {
2887 	struct thread *td;
2888 
2889 	td = curthread;
2890 	return (priv_check(td, PRIV_NET_WG) == 0);
2891 }
2892 
2893 static void
2894 wg_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
2895     char *unused __unused)
2896 {
2897 	struct wg_softc *sc;
2898 
2899 	sc = ifp->if_softc;
2900 	wg_down(sc);
2901 }
2902 
2903 static void
2904 wg_init(void *xsc)
2905 {
2906 	struct wg_softc *sc;
2907 
2908 	sc = xsc;
2909 	wg_up(sc);
2910 }
2911 
2912 static void
2913 vnet_wg_init(const void *unused __unused)
2914 {
2915 	V_wg_cloner = if_clone_simple(wgname, wg_clone_create, wg_clone_destroy,
2916 				      0);
2917 }
2918 VNET_SYSINIT(vnet_wg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2919 	     vnet_wg_init, NULL);
2920 
2921 static void
2922 vnet_wg_uninit(const void *unused __unused)
2923 {
2924 	if (V_wg_cloner)
2925 		if_clone_detach(V_wg_cloner);
2926 }
2927 VNET_SYSUNINIT(vnet_wg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
2928 	       vnet_wg_uninit, NULL);
2929 
2930 static int
2931 wg_prison_remove(void *obj, void *data __unused)
2932 {
2933 	const struct prison *pr = obj;
2934 	struct wg_softc *sc;
2935 
2936 	/*
2937 	 * Do a pass through all if_wg interfaces and release creds on any from
2938 	 * the jail that are supposed to be going away.  This will, in turn, let
2939 	 * the jail die so that we don't end up with Schrödinger's jail.
2940 	 */
2941 	sx_slock(&wg_sx);
2942 	LIST_FOREACH(sc, &wg_list, sc_entry) {
2943 		sx_xlock(&sc->sc_lock);
2944 		if (!(sc->sc_flags & WGF_DYING) && sc->sc_ucred && sc->sc_ucred->cr_prison == pr) {
2945 			struct ucred *cred = sc->sc_ucred;
2946 			DPRINTF(sc, "Creating jail exiting\n");
2947 			if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
2948 			wg_socket_uninit(sc);
2949 			sc->sc_ucred = NULL;
2950 			crfree(cred);
2951 			sc->sc_flags |= WGF_DYING;
2952 		}
2953 		sx_xunlock(&sc->sc_lock);
2954 	}
2955 	sx_sunlock(&wg_sx);
2956 
2957 	return (0);
2958 }
2959 
2960 #ifdef SELFTESTS
2961 #include "selftest/allowedips.c"
2962 static bool wg_run_selftests(void)
2963 {
2964 	bool ret = true;
2965 	ret &= wg_allowedips_selftest();
2966 	ret &= noise_counter_selftest();
2967 	ret &= cookie_selftest();
2968 	return ret;
2969 }
2970 #else
2971 static inline bool wg_run_selftests(void) { return true; }
2972 #endif
2973 
2974 static int
2975 wg_module_init(void)
2976 {
2977 	int ret = ENOMEM;
2978 
2979 	osd_method_t methods[PR_MAXMETHOD] = {
2980 		[PR_METHOD_REMOVE] = wg_prison_remove,
2981 	};
2982 
2983 	if ((wg_packet_zone = uma_zcreate("wg packet", sizeof(struct wg_packet),
2984 	     NULL, NULL, NULL, NULL, 0, 0)) == NULL)
2985 		goto free_none;
2986 	ret = crypto_init();
2987 	if (ret != 0)
2988 		goto free_zone;
2989 	if (cookie_init() != 0)
2990 		goto free_crypto;
2991 
2992 	wg_osd_jail_slot = osd_jail_register(NULL, methods);
2993 
2994 	ret = ENOTRECOVERABLE;
2995 	if (!wg_run_selftests())
2996 		goto free_all;
2997 
2998 	return (0);
2999 
3000 free_all:
3001 	osd_jail_deregister(wg_osd_jail_slot);
3002 	cookie_deinit();
3003 free_crypto:
3004 	crypto_deinit();
3005 free_zone:
3006 	uma_zdestroy(wg_packet_zone);
3007 free_none:
3008 	return (ret);
3009 }
3010 
3011 static void
3012 wg_module_deinit(void)
3013 {
3014 	VNET_ITERATOR_DECL(vnet_iter);
3015 	VNET_LIST_RLOCK();
3016 	VNET_FOREACH(vnet_iter) {
3017 		struct if_clone *clone = VNET_VNET(vnet_iter, wg_cloner);
3018 		if (clone) {
3019 			if_clone_detach(clone);
3020 			VNET_VNET(vnet_iter, wg_cloner) = NULL;
3021 		}
3022 	}
3023 	VNET_LIST_RUNLOCK();
3024 	NET_EPOCH_WAIT();
3025 	MPASS(LIST_EMPTY(&wg_list));
3026 	osd_jail_deregister(wg_osd_jail_slot);
3027 	cookie_deinit();
3028 	crypto_deinit();
3029 	uma_zdestroy(wg_packet_zone);
3030 }
3031 
3032 static int
3033 wg_module_event_handler(module_t mod, int what, void *arg)
3034 {
3035 	switch (what) {
3036 		case MOD_LOAD:
3037 			return wg_module_init();
3038 		case MOD_UNLOAD:
3039 			wg_module_deinit();
3040 			break;
3041 		default:
3042 			return (EOPNOTSUPP);
3043 	}
3044 	return (0);
3045 }
3046 
3047 static moduledata_t wg_moduledata = {
3048 	wgname,
3049 	wg_module_event_handler,
3050 	NULL
3051 };
3052 
3053 DECLARE_MODULE(wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
3054 MODULE_VERSION(wg, WIREGUARD_VERSION);
3055 MODULE_DEPEND(wg, crypto, 1, 1, 1);
3056