xref: /freebsd/sys/netipsec/ipsec_offload.c (revision ae8d58814089308028046ac80aeeb9cbb784bd0a)
1 /*-
2  * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_inet.h"
27 #include "opt_inet6.h"
28 #include "opt_ipsec.h"
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/ck.h>
33 #include <sys/kernel.h>
34 #include <sys/mbuf.h>
35 #include <sys/pctrie.h>
36 #include <sys/proc.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/protosw.h>
40 #include <sys/taskqueue.h>
41 
42 #include <machine/stdarg.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/vnet.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_var.h>
50 #include <netinet/ip6.h>
51 #include <netinet6/ip6_var.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/tcp_var.h>
54 
55 #include <netipsec/key.h>
56 #include <netipsec/keydb.h>
57 #include <netipsec/key_debug.h>
58 #include <netipsec/xform.h>
59 #include <netipsec/ipsec.h>
60 #include <netipsec/ipsec_offload.h>
61 #include <netipsec/ah_var.h>
62 #include <netipsec/esp.h>
63 #include <netipsec/esp_var.h>
64 #include <netipsec/ipcomp_var.h>
65 
66 #ifdef IPSEC_OFFLOAD
67 
68 static struct mtx ipsec_accel_sav_tmp;
69 static struct unrhdr *drv_spi_unr;
70 static struct mtx ipsec_accel_cnt_lock;
71 
72 struct ipsec_accel_install_newkey_tq {
73 	struct secasvar *sav;
74 	struct vnet *install_vnet;
75 	struct task install_task;
76 };
77 
78 struct ipsec_accel_forget_tq {
79 	struct vnet *forget_vnet;
80 	struct task forget_task;
81 	struct secasvar *sav;
82 };
83 
84 struct ifp_handle_sav {
85 	CK_LIST_ENTRY(ifp_handle_sav) sav_link;
86 	CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link;
87 	struct secasvar *sav;
88 	struct ifnet *ifp;
89 	void *ifdata;
90 	uint64_t drv_spi;
91 	uint32_t flags;
92 	size_t hdr_ext_size;
93 	uint64_t cnt_octets;
94 	uint64_t cnt_allocs;
95 };
96 
97 #define	IFP_HS_HANDLED	0x00000001
98 #define	IFP_HS_REJECTED	0x00000002
99 #define	IFP_HS_INPUT	0x00000004
100 #define	IFP_HS_OUTPUT	0x00000008
101 #define	IFP_HS_MARKER	0x00000010
102 
103 static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles;
104 
105 struct ifp_handle_sp {
106 	CK_LIST_ENTRY(ifp_handle_sp) sp_link;
107 	CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link;
108 	struct secpolicy *sp;
109 	struct ifnet *ifp;
110 	void *ifdata;
111 	uint32_t flags;
112 };
113 
114 #define	IFP_HP_HANDLED	0x00000001
115 #define	IFP_HP_REJECTED	0x00000002
116 #define	IFP_HP_MARKER	0x00000004
117 
118 static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles;
119 
120 static void *
121 drvspi_sa_trie_alloc(struct pctrie *ptree)
122 {
123 	void *res;
124 
125 	res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT);
126 	if (res != NULL)
127 		pctrie_zone_init(res, 0, 0);
128 	return (res);
129 }
130 
131 static void
132 drvspi_sa_trie_free(struct pctrie *ptree, void *node)
133 {
134 	free(node, M_IPSEC_MISC);
135 }
136 
137 PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi,
138     drvspi_sa_trie_alloc, drvspi_sa_trie_free);
139 static struct pctrie drv_spi_pctrie;
140 
141 static void ipsec_accel_sa_newkey_impl(struct secasvar *sav);
142 static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp,
143     u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires);
144 static void ipsec_accel_forget_sav_clear(struct secasvar *sav);
145 static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav,
146     struct ifnet *ifp);
147 static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav,
148     struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op,
149     struct rm_priotracker *sahtree_trackerp);
150 static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m);
151 static void ipsec_accel_sync_imp(void);
152 static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav);
153 static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav);
154 
155 static void
156 ipsec_accel_init(void *arg)
157 {
158 	mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0);
159 	mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0);
160 	drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN,
161 	    IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp);
162 	ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl;
163 	ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl;
164 	ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl;
165 	ipsec_accel_spddel_p = ipsec_accel_spddel_impl;
166 	ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl;
167 	ipsec_accel_sync_p = ipsec_accel_sync_imp;
168 	ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl;
169 	ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl;
170 	pctrie_init(&drv_spi_pctrie);
171 }
172 SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY,
173     ipsec_accel_init, NULL);
174 
175 static void
176 ipsec_accel_fini(void *arg)
177 {
178 	ipsec_accel_sa_newkey_p = NULL;
179 	ipsec_accel_forget_sav_p = NULL;
180 	ipsec_accel_spdadd_p = NULL;
181 	ipsec_accel_spddel_p = NULL;
182 	ipsec_accel_sa_lifetime_op_p = NULL;
183 	ipsec_accel_sync_p = NULL;
184 	ipsec_accel_is_accel_sav_p = NULL;
185 	ipsec_accel_key_setaccelif_p = NULL;
186 	ipsec_accel_sync_imp();
187 	clean_unrhdr(drv_spi_unr);	/* avoid panic, should go later */
188 	clear_unrhdr(drv_spi_unr);
189 	delete_unrhdr(drv_spi_unr);
190 	mtx_destroy(&ipsec_accel_sav_tmp);
191 	mtx_destroy(&ipsec_accel_cnt_lock);
192 }
193 SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY,
194     ipsec_accel_fini, NULL);
195 
196 SYSCTL_NODE(_net_inet_ipsec, OID_AUTO, offload, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
197     "");
198 
199 static bool ipsec_offload_verbose = false;
200 SYSCTL_BOOL(_net_inet_ipsec_offload, OID_AUTO, verbose, CTLFLAG_RW,
201     &ipsec_offload_verbose, 0,
202     "Verbose SA/SP offload install and deinstall");
203 
204 static void
205 dprintf(const char *fmt, ...)
206 {
207 	va_list ap;
208 
209 	if (!ipsec_offload_verbose)
210 		return;
211 
212 	va_start(ap, fmt);
213 	vprintf(fmt, ap);
214 	va_end(ap);
215 }
216 
217 static void
218 ipsec_accel_alloc_forget_tq(struct secasvar *sav)
219 {
220 	void *ftq;
221 
222 	if (sav->accel_forget_tq != 0)
223 		return;
224 
225 	ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK);
226 	if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq))
227 		free(ftq, M_TEMP);
228 }
229 
230 static bool
231 ipsec_accel_sa_install_match(if_t ifp, void *arg)
232 {
233 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0)
234 		return (false);
235 	if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) {
236 		dprintf("driver bug ifp %s if_sa_newkey NULL\n",
237 		    if_name(ifp));
238 		return (false);
239 	}
240 	return (true);
241 }
242 
243 static int
244 ipsec_accel_sa_newkey_cb(if_t ifp, void *arg)
245 {
246 	struct ipsec_accel_install_newkey_tq *tq;
247 	void *priv;
248 	u_int drv_spi;
249 	int error;
250 
251 	tq = arg;
252 
253 	dprintf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x "
254 	    "flags %#x seq %d\n",
255 	    if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey,
256 	    be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq);
257 	priv = NULL;
258 	drv_spi = alloc_unr(drv_spi_unr);
259 	if (tq->sav->accel_ifname != NULL &&
260 	    strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) {
261 		error = ipsec_accel_handle_sav(tq->sav,
262 		    ifp, drv_spi, priv, IFP_HS_REJECTED, NULL);
263 		goto out;
264 	}
265 	if (drv_spi == -1) {
266 		/* XXXKIB */
267 		dprintf("ipsec_accel_sa_install_newkey: cannot alloc "
268 		    "drv_spi if %s spi %#x\n", if_name(ifp),
269 		    be32toh(tq->sav->spi));
270 		return (ENOMEM);
271 	}
272 	error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav,
273 	    drv_spi, &priv);
274 	if (error != 0) {
275 		if (error == EOPNOTSUPP) {
276 			dprintf("ipsec_accel_sa_newkey: driver "
277 			    "refused sa if %s spi %#x\n",
278 			    if_name(ifp), be32toh(tq->sav->spi));
279 			error = ipsec_accel_handle_sav(tq->sav,
280 			    ifp, drv_spi, priv, IFP_HS_REJECTED, NULL);
281 			/* XXXKIB */
282 		} else {
283 			dprintf("ipsec_accel_sa_newkey: driver "
284 			    "error %d if %s spi %#x\n",
285 			    error, if_name(ifp), be32toh(tq->sav->spi));
286 			/* XXXKIB */
287 		}
288 	} else {
289 		error = ipsec_accel_handle_sav(tq->sav, ifp,
290 		    drv_spi, priv, IFP_HS_HANDLED, NULL);
291 		if (error != 0) {
292 			/* XXXKIB */
293 			dprintf("ipsec_accel_sa_newkey: handle_sav "
294 			    "err %d if %s spi %#x\n", error,
295 			    if_name(ifp), be32toh(tq->sav->spi));
296 		}
297 	}
298 out:
299 	return (error);
300 }
301 
302 static void
303 ipsec_accel_sa_newkey_act(void *context, int pending)
304 {
305 	struct ipsec_accel_install_newkey_tq *tq;
306 	void *tqf;
307 	struct secasvar *sav;
308 
309 	tq = context;
310 	tqf = NULL;
311 	sav = tq->sav;
312 	CURVNET_SET(tq->install_vnet);
313 	mtx_lock(&ipsec_accel_sav_tmp);
314 	if ((sav->accel_flags & (SADB_KEY_ACCEL_INST |
315 	    SADB_KEY_ACCEL_DEINST)) == 0 &&
316 	    sav->state == SADB_SASTATE_MATURE) {
317 		sav->accel_flags |= SADB_KEY_ACCEL_INST;
318 		mtx_unlock(&ipsec_accel_sav_tmp);
319 		if_foreach_sleep(ipsec_accel_sa_install_match, context,
320 		    ipsec_accel_sa_newkey_cb, context);
321 		ipsec_accel_alloc_forget_tq(sav);
322 		mtx_lock(&ipsec_accel_sav_tmp);
323 
324 		/*
325 		 * If ipsec_accel_forget_sav() raced with us and set
326 		 * the flag, do its work.  Its task cannot execute in
327 		 * parallel since taskqueue_thread is single-threaded.
328 		 */
329 		if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) {
330 			tqf = (void *)sav->accel_forget_tq;
331 			sav->accel_forget_tq = 0;
332 			ipsec_accel_forget_sav_clear(sav);
333 		}
334 	}
335 	mtx_unlock(&ipsec_accel_sav_tmp);
336 	key_freesav(&tq->sav);
337 	CURVNET_RESTORE();
338 	free(tq, M_TEMP);
339 	free(tqf, M_TEMP);
340 }
341 
342 static void
343 ipsec_accel_sa_newkey_impl(struct secasvar *sav)
344 {
345 	struct ipsec_accel_install_newkey_tq *tq;
346 
347 	if ((sav->accel_flags & (SADB_KEY_ACCEL_INST |
348 	    SADB_KEY_ACCEL_DEINST)) != 0)
349 		return;
350 
351 	dprintf(
352 	    "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n",
353 	    be32toh(sav->spi), sav->flags, sav->seq);
354 
355 	tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT);
356 	if (tq == NULL) {
357 		dprintf("ipsec_accel_sa_install_newkey: no memory for tq, "
358 		    "spi %#x\n", be32toh(sav->spi));
359 		/* XXXKIB */
360 		return;
361 	}
362 
363 	refcount_acquire(&sav->refcnt);
364 
365 	TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq);
366 	tq->sav = sav;
367 	tq->install_vnet = curthread->td_vnet;	/* XXXKIB liveness */
368 	taskqueue_enqueue(taskqueue_thread, &tq->install_task);
369 }
370 
371 static int
372 ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp,
373     u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires)
374 {
375 	struct ifp_handle_sav *ihs, *i;
376 	int error;
377 
378 	MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1);
379 
380 	ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO);
381 	ihs->ifp = ifp;
382 	ihs->sav = sav;
383 	ihs->drv_spi = drv_spi;
384 	ihs->ifdata = priv;
385 	ihs->flags = flags;
386 	if ((flags & IFP_HS_OUTPUT) != 0)
387 		ihs->hdr_ext_size = esp_hdrsiz(sav);
388 	mtx_lock(&ipsec_accel_sav_tmp);
389 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
390 		if (i->ifp == ifp) {
391 			error = EALREADY;
392 			goto errout;
393 		}
394 	}
395 	error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs);
396 	if (error != 0)
397 		goto errout;
398 	if_ref(ihs->ifp);
399 	CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link);
400 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link);
401 	mtx_unlock(&ipsec_accel_sav_tmp);
402 	if (ires != NULL)
403 		*ires = ihs;
404 	return (0);
405 errout:
406 	mtx_unlock(&ipsec_accel_sav_tmp);
407 	free(ihs, M_IPSEC_MISC);
408 	if (ires != NULL)
409 		*ires = NULL;
410 	return (error);
411 }
412 
413 static void
414 ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav)
415 {
416 	struct ifnet *ifp;
417 	struct secasvar *sav;
418 
419 	mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED);
420 
421 	CK_LIST_REMOVE(i, sav_link);
422 	CK_LIST_REMOVE(i, sav_allh_link);
423 	DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi);
424 	mtx_unlock(&ipsec_accel_sav_tmp);
425 	NET_EPOCH_WAIT();
426 	ifp = i->ifp;
427 	sav = i->sav;
428 	if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
429 	    IFP_HS_HANDLED) {
430 		dprintf("sa deinstall %s %p spi %#x ifl %#x\n",
431 		    if_name(ifp), sav, be32toh(sav->spi), i->flags);
432 		ifp->if_ipsec_accel_m->if_sa_deinstall(ifp,
433 		    i->drv_spi, i->ifdata);
434 	}
435 	if_rele(ifp);
436 	free_unr(drv_spi_unr, i->drv_spi);
437 	free(i, M_IPSEC_MISC);
438 	if (freesav)
439 		key_freesav(&sav);
440 	mtx_lock(&ipsec_accel_sav_tmp);
441 }
442 
443 static void
444 ipsec_accel_forget_sav_clear(struct secasvar *sav)
445 {
446 	struct ifp_handle_sav *i;
447 
448 	for (;;) {
449 		i = CK_LIST_FIRST(&sav->accel_ifps);
450 		if (i == NULL)
451 			break;
452 		ipsec_accel_forget_handle_sav(i, false);
453 	}
454 }
455 
456 static void
457 ipsec_accel_forget_sav_act(void *arg, int pending)
458 {
459 	struct ipsec_accel_forget_tq *tq;
460 	struct secasvar *sav;
461 
462 	tq = arg;
463 	sav = tq->sav;
464 	CURVNET_SET(tq->forget_vnet);
465 	mtx_lock(&ipsec_accel_sav_tmp);
466 	ipsec_accel_forget_sav_clear(sav);
467 	mtx_unlock(&ipsec_accel_sav_tmp);
468 	key_freesav(&sav);
469 	CURVNET_RESTORE();
470 	free(tq, M_TEMP);
471 }
472 
473 void
474 ipsec_accel_forget_sav_impl(struct secasvar *sav)
475 {
476 	struct ipsec_accel_forget_tq *tq;
477 
478 	mtx_lock(&ipsec_accel_sav_tmp);
479 	sav->accel_flags |= SADB_KEY_ACCEL_DEINST;
480 	tq = (void *)atomic_load_ptr(&sav->accel_forget_tq);
481 	if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq,
482 	    (uintptr_t)tq, 0)) {
483 		mtx_unlock(&ipsec_accel_sav_tmp);
484 		return;
485 	}
486 	mtx_unlock(&ipsec_accel_sav_tmp);
487 
488 	refcount_acquire(&sav->refcnt);
489 	TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq);
490 	tq->forget_vnet = curthread->td_vnet;
491 	tq->sav = sav;
492 	taskqueue_enqueue(taskqueue_thread, &tq->forget_task);
493 }
494 
495 static void
496 ipsec_accel_on_ifdown_sav(struct ifnet *ifp)
497 {
498 	struct ifp_handle_sav *i, *marker;
499 
500 	marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO);
501 	marker->flags = IFP_HS_MARKER;
502 
503 	mtx_lock(&ipsec_accel_sav_tmp);
504 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker,
505 	    sav_allh_link);
506 	for (;;) {
507 		i = CK_LIST_NEXT(marker, sav_allh_link);
508 		if (i == NULL)
509 			break;
510 		CK_LIST_REMOVE(marker, sav_allh_link);
511 		CK_LIST_INSERT_AFTER(i, marker, sav_allh_link);
512 		if (i->ifp == ifp) {
513 			refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */
514 			ipsec_accel_forget_handle_sav(i, true);
515 		}
516 	}
517 	CK_LIST_REMOVE(marker, sav_allh_link);
518 	mtx_unlock(&ipsec_accel_sav_tmp);
519 	free(marker, M_IPSEC_MISC);
520 }
521 
522 static struct ifp_handle_sav *
523 ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp)
524 {
525 	struct ifp_handle_sav *i;
526 
527 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0)
528 		return (NULL);
529 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
530 		if (i->ifp == ifp)
531 			return (i);
532 	}
533 	return (NULL);
534 }
535 
536 static struct ifp_handle_sav *
537 ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp)
538 {
539 	NET_EPOCH_ASSERT();
540 	return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp));
541 }
542 
543 static bool
544 ipsec_accel_is_accel_sav_impl(struct secasvar *sav)
545 {
546 	return (!CK_LIST_EMPTY(&sav->accel_ifps));
547 }
548 
549 static struct secasvar *
550 ipsec_accel_drvspi_to_sa(u_int drv_spi)
551 {
552 	struct ifp_handle_sav *i;
553 
554 	i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi);
555 	if (i == NULL)
556 		return (NULL);
557 	return (i->sav);
558 }
559 
560 static struct ifp_handle_sp *
561 ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp)
562 {
563 	struct ifp_handle_sp *i;
564 
565 	CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) {
566 		if (i->ifp == ifp)
567 			return (i);
568 	}
569 	return (NULL);
570 }
571 
572 static bool
573 ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp)
574 {
575 	return (ipsec_accel_find_accel_sp(sp, ifp) != NULL);
576 }
577 
578 static int
579 ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp,
580     struct ifp_handle_sp **ip)
581 {
582 	struct ifp_handle_sp *i;
583 
584 	i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO);
585 	i->sp = sp;
586 	i->ifp = ifp;
587 	if_ref(ifp);
588 	i->flags = IFP_HP_HANDLED;
589 	mtx_lock(&ipsec_accel_sav_tmp);
590 	CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link);
591 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link);
592 	mtx_unlock(&ipsec_accel_sav_tmp);
593 	*ip = i;
594 	return (0);
595 }
596 
597 static bool
598 ipsec_accel_spdadd_match(if_t ifp, void *arg)
599 {
600 	struct secpolicy *sp;
601 
602 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 ||
603 	    ifp->if_ipsec_accel_m->if_spdadd == NULL)
604 		return (false);
605 	sp = arg;
606 	if (sp->accel_ifname != NULL &&
607 	    strcmp(sp->accel_ifname, if_name(ifp)) != 0)
608 		return (false);
609 	if (ipsec_accel_is_accel_sp(sp, ifp))
610 		return (false);
611 	return (true);
612 }
613 
614 static int
615 ipsec_accel_spdadd_cb(if_t ifp, void *arg)
616 {
617 	struct secpolicy *sp;
618 	struct inpcb *inp;
619 	struct ifp_handle_sp *i;
620 	int error;
621 
622 	sp = arg;
623 	inp = sp->ipsec_accel_add_sp_inp;
624 	dprintf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n",
625 	    if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp);
626 	error = ipsec_accel_remember_sp(sp, ifp, &i);
627 	if (error != 0) {
628 		dprintf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n",
629 		    if_name(ifp), sp, error);
630 		return (error);
631 	}
632 	error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata);
633 	if (error != 0) {
634 		i->flags |= IFP_HP_REJECTED;
635 		dprintf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n",
636 		    if_name(ifp), sp, error);
637 	}
638 	return (error);
639 }
640 
641 static void
642 ipsec_accel_spdadd_act(void *arg, int pending)
643 {
644 	struct secpolicy *sp;
645 	struct inpcb *inp;
646 
647 	sp = arg;
648 	CURVNET_SET(sp->accel_add_tq.adddel_vnet);
649 	if_foreach_sleep(ipsec_accel_spdadd_match, arg,
650 	    ipsec_accel_spdadd_cb, arg);
651 	inp = sp->ipsec_accel_add_sp_inp;
652 	if (inp != NULL) {
653 		INP_WLOCK(inp);
654 		if (!in_pcbrele_wlocked(inp))
655 			INP_WUNLOCK(inp);
656 		sp->ipsec_accel_add_sp_inp = NULL;
657 	}
658 	CURVNET_RESTORE();
659 	key_freesp(&sp);
660 }
661 
662 void
663 ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp)
664 {
665 	struct ipsec_accel_adddel_sp_tq *tq;
666 
667 	if (sp == NULL)
668 		return;
669 	if (sp->tcount == 0 && inp == NULL)
670 		return;
671 	tq = &sp->accel_add_tq;
672 	if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0)
673 		return;
674 	tq->adddel_vnet = curthread->td_vnet;
675 	sp->ipsec_accel_add_sp_inp = inp;
676 	if (inp != NULL)
677 		in_pcbref(inp);
678 	TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp);
679 	key_addref(sp);
680 	taskqueue_enqueue(taskqueue_thread, &tq->adddel_task);
681 }
682 
683 static void
684 ipsec_accel_spddel_act(void *arg, int pending)
685 {
686 	struct ifp_handle_sp *i;
687 	struct secpolicy *sp;
688 	int error;
689 
690 	sp = arg;
691 	CURVNET_SET(sp->accel_del_tq.adddel_vnet);
692 	mtx_lock(&ipsec_accel_sav_tmp);
693 	for (;;) {
694 		i = CK_LIST_FIRST(&sp->accel_ifps);
695 		if (i == NULL)
696 			break;
697 		CK_LIST_REMOVE(i, sp_link);
698 		CK_LIST_REMOVE(i, sp_allh_link);
699 		mtx_unlock(&ipsec_accel_sav_tmp);
700 		NET_EPOCH_WAIT();
701 		if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) ==
702 		    IFP_HP_HANDLED) {
703 			dprintf("spd deinstall %s %p\n", if_name(i->ifp), sp);
704 			error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp,
705 			    sp, i->ifdata);
706 			if (error != 0) {
707 				dprintf(
708 		    "ipsec_accel_spddel: %s if_spddel %p res %d\n",
709 				    if_name(i->ifp), sp, error);
710 			}
711 		}
712 		if_rele(i->ifp);
713 		free(i, M_IPSEC_MISC);
714 		mtx_lock(&ipsec_accel_sav_tmp);
715 	}
716 	mtx_unlock(&ipsec_accel_sav_tmp);
717 	key_freesp(&sp);
718 	CURVNET_RESTORE();
719 }
720 
721 void
722 ipsec_accel_spddel_impl(struct secpolicy *sp)
723 {
724 	struct ipsec_accel_adddel_sp_tq *tq;
725 
726 	if (sp == NULL)
727 		return;
728 
729 	tq = &sp->accel_del_tq;
730 	if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0)
731 		return;
732 	tq->adddel_vnet = curthread->td_vnet;
733 	TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp);
734 	key_addref(sp);
735 	taskqueue_enqueue(taskqueue_thread, &tq->adddel_task);
736 }
737 
738 static void
739 ipsec_accel_on_ifdown_sp(struct ifnet *ifp)
740 {
741 	struct ifp_handle_sp *i, *marker;
742 	struct secpolicy *sp;
743 	int error;
744 
745 	marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO);
746 	marker->flags = IFP_HS_MARKER;
747 
748 	mtx_lock(&ipsec_accel_sav_tmp);
749 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker,
750 	    sp_allh_link);
751 	for (;;) {
752 		i = CK_LIST_NEXT(marker, sp_allh_link);
753 		if (i == NULL)
754 			break;
755 		CK_LIST_REMOVE(marker, sp_allh_link);
756 		CK_LIST_INSERT_AFTER(i, marker, sp_allh_link);
757 		if (i->ifp != ifp)
758 			continue;
759 
760 		sp = i->sp;
761 		key_addref(sp);
762 		CK_LIST_REMOVE(i, sp_link);
763 		CK_LIST_REMOVE(i, sp_allh_link);
764 		mtx_unlock(&ipsec_accel_sav_tmp);
765 		NET_EPOCH_WAIT();
766 		if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) ==
767 		    IFP_HP_HANDLED) {
768 			dprintf("spd deinstall %s %p\n", if_name(ifp), sp);
769 			error = ifp->if_ipsec_accel_m->if_spddel(ifp,
770 			    sp, i->ifdata);
771 		}
772 		if (error != 0) {
773 			dprintf(
774 		    "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n",
775 			    if_name(ifp), sp, error);
776 		}
777 		key_freesp(&sp);
778 		if_rele(ifp);
779 		free(i, M_IPSEC_MISC);
780 		mtx_lock(&ipsec_accel_sav_tmp);
781 	}
782 	CK_LIST_REMOVE(marker, sp_allh_link);
783 	mtx_unlock(&ipsec_accel_sav_tmp);
784 	free(marker, M_IPSEC_MISC);
785 }
786 
787 void
788 ipsec_accel_on_ifdown(struct ifnet *ifp)
789 {
790 	ipsec_accel_on_ifdown_sp(ifp);
791 	ipsec_accel_on_ifdown_sav(ifp);
792 }
793 
794 static bool
795 ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu)
796 {
797 	int alen, blks, hlen, padding, rlen;
798 
799 	rlen = m->m_pkthdr.len - skip;
800 	hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) :
801 	    sizeof(struct newesp)) + sav->ivlen;
802 	blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ?
803 	    sav->tdb_encalgxform->native_blocksize :
804 	    sav->tdb_encalgxform->blocksize);
805 	padding = ((blks - ((rlen + 2) % blks)) % blks) + 2;
806 	alen = xform_ah_authsize(sav->tdb_authalgxform);
807 
808 	return (skip + hlen + rlen + padding + alen <= mtu);
809 }
810 
811 static bool
812 ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi)
813 {
814 	struct ipsec_accel_out_tag *tag;
815 
816 	tag = (struct ipsec_accel_out_tag *)m_tag_get(
817 	    PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT);
818 	if (tag == NULL)
819 		return (false);
820 	tag->drv_spi = drv_spi;
821 	m_tag_prepend(m, &tag->tag);
822 	return (true);
823 }
824 
825 bool
826 ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp,
827     struct secpolicy *sp, struct secasvar *sav, int af, int mtu, int *hwassist)
828 {
829 	struct ifp_handle_sav *i;
830 	struct ip *ip;
831 	struct tcpcb *tp;
832 	u_long ip_len, skip;
833 	bool res;
834 
835 	*hwassist = 0;
836 	res = false;
837 	if (ifp == NULL)
838 		return (res);
839 
840 	M_ASSERTPKTHDR(m);
841 	NET_EPOCH_ASSERT();
842 
843 	if (sav == NULL) {
844 		res = ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS);
845 		goto out;
846 	}
847 
848 	i = ipsec_accel_is_accel_sav_ptr(sav, ifp);
849 	if (i == NULL)
850 		goto out;
851 
852 	if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
853 		ip_len = m->m_pkthdr.len;
854 		if (ip_len + i->hdr_ext_size > mtu)
855 			goto out;
856 		switch (af) {
857 		case AF_INET:
858 			ip = mtod(m, struct ip *);
859 			skip = ip->ip_hl << 2;
860 			break;
861 		case AF_INET6:
862 			skip = sizeof(struct ip6_hdr);
863 			break;
864 		default:
865 			__unreachable();
866 		}
867 		if (!ipsec_accel_output_pad(m, sav, skip, mtu))
868 			goto out;
869 	}
870 
871 	if (!ipsec_accel_output_tag(m, i->drv_spi))
872 		goto out;
873 
874 	ipsec_accel_sa_recordxfer(sav, m);
875 	key_freesav(&sav);
876 	if (sp != NULL)
877 		key_freesp(&sp);
878 
879 	*hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav,
880 	    i->drv_spi, i->ifdata);
881 	res = true;
882 out:
883 	if (inp != NULL && inp->inp_pcbinfo == &V_tcbinfo) {
884 		INP_WLOCK_ASSERT(inp);
885 		tp = (struct tcpcb *)inp;
886 		if (res && (*hwassist & (CSUM_TSO | CSUM_IP6_TSO)) != 0) {
887 			tp->t_flags2 |= TF2_IPSEC_TSO;
888 		} else {
889 			tp->t_flags2 &= ~TF2_IPSEC_TSO;
890 		}
891 	}
892 	return (res);
893 }
894 
895 struct ipsec_accel_in_tag *
896 ipsec_accel_input_tag_lookup(const struct mbuf *m)
897 {
898 	struct ipsec_accel_in_tag *tag;
899 	struct m_tag *xtag;
900 
901 	xtag = m_tag_find(__DECONST(struct mbuf *, m),
902 	    PACKET_TAG_IPSEC_ACCEL_IN, NULL);
903 	if (xtag == NULL)
904 		return (NULL);
905 	tag = __containerof(xtag, struct ipsec_accel_in_tag, tag);
906 	return (tag);
907 }
908 
909 int
910 ipsec_accel_input(struct mbuf *m, int offset, int proto)
911 {
912 	struct secasvar *sav;
913 	struct ipsec_accel_in_tag *tag;
914 
915 	tag = ipsec_accel_input_tag_lookup(m);
916 	if (tag == NULL)
917 		return (ENXIO);
918 
919 	if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN ||
920 	    tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) {
921 		dprintf("if %s mbuf %p drv_spi %d invalid, packet dropped\n",
922 		    (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) :
923 		    "<unknwn>", m, tag->drv_spi);
924 		m_freem(m);
925 		return (EINPROGRESS);
926 	}
927 
928 	sav = ipsec_accel_drvspi_to_sa(tag->drv_spi);
929 	if (sav != NULL)
930 		ipsec_accel_sa_recordxfer(sav, m);
931 	return (0);
932 }
933 
934 static void
935 ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m)
936 {
937 	counter_u64_add(sav->accel_lft_sw, 1);
938 	counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len);
939 	if (sav->accel_firstused == 0)
940 		sav->accel_firstused = time_second;
941 }
942 
943 static void
944 ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c,
945     const struct seclifetime *lft_l)
946 {
947 	lft_c->allocations += lft_l->allocations;
948 	lft_c->bytes += lft_l->bytes;
949 	lft_c->usetime = min(lft_c->usetime, lft_l->usetime);
950 }
951 
952 void
953 ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp,
954     u_int drv_spi, uint64_t octets, uint64_t allocs)
955 {
956 	struct epoch_tracker et;
957 	struct ifp_handle_sav *i;
958 	uint64_t odiff, adiff;
959 
960 	NET_EPOCH_ENTER(et);
961 	mtx_lock(&ipsec_accel_cnt_lock);
962 
963 	if (allocs != 0) {
964 		if (sav->firstused == 0)
965 			sav->firstused = time_second;
966 		if (sav->accel_firstused == 0)
967 			sav->accel_firstused = time_second;
968 	}
969 
970 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
971 		if (i->ifp == ifp && i->drv_spi == drv_spi)
972 			break;
973 	}
974 	if (i == NULL)
975 		goto out;
976 
977 	odiff = octets - i->cnt_octets;
978 	adiff = allocs - i->cnt_allocs;
979 
980 	if (sav->lft_c != NULL) {
981 		counter_u64_add(sav->lft_c_bytes, odiff);
982 		counter_u64_add(sav->lft_c_allocations, adiff);
983 	}
984 
985 	i->cnt_octets = octets;
986 	i->cnt_allocs = allocs;
987 	sav->accel_hw_octets += odiff;
988 	sav->accel_hw_allocs += adiff;
989 
990 out:
991 	mtx_unlock(&ipsec_accel_cnt_lock);
992 	NET_EPOCH_EXIT(et);
993 }
994 
995 static void
996 ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp,
997     struct seclifetime *lft)
998 {
999 	struct ifp_handle_sav *i;
1000 	if_sa_cnt_fn_t p;
1001 
1002 	IFNET_RLOCK_ASSERT();
1003 
1004 	i = ipsec_accel_is_accel_sav_ptr(sav, ifp);
1005 	if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
1006 	    IFP_HS_HANDLED) {
1007 		p = ifp->if_ipsec_accel_m->if_sa_cnt;
1008 		if (p != NULL)
1009 			p(ifp, sav, i->drv_spi, i->ifdata, lft);
1010 	}
1011 }
1012 
1013 static int
1014 ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav,
1015     struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op,
1016     struct rm_priotracker *sahtree_trackerp)
1017 {
1018 	struct seclifetime lft_l, lft_s;
1019 	struct ifp_handle_sav *i;
1020 	if_t ifp1;
1021 	if_sa_cnt_fn_t p;
1022 	int error;
1023 
1024 	error = 0;
1025 	memset(&lft_l, 0, sizeof(lft_l));
1026 	memset(&lft_s, 0, sizeof(lft_s));
1027 
1028 	switch (op & ~IF_SA_CNT_UPD) {
1029 	case IF_SA_CNT_IFP_HW_VAL:
1030 		ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l);
1031 		ipsec_accel_sa_lifetime_update(&lft_l, &lft_s);
1032 		break;
1033 
1034 	case IF_SA_CNT_TOTAL_SW_VAL:
1035 		lft_l.allocations = (uint32_t)counter_u64_fetch(
1036 		    sav->accel_lft_sw);
1037 		lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1);
1038 		lft_l.usetime = sav->accel_firstused;
1039 		break;
1040 
1041 	case IF_SA_CNT_TOTAL_HW_VAL:
1042 		IFNET_RLOCK_ASSERT();
1043 		CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
1044 			if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) !=
1045 			    IFP_HS_HANDLED)
1046 				continue;
1047 			ifp1 = i->ifp;
1048 			p = ifp1->if_ipsec_accel_m->if_sa_cnt;
1049 			if (p == NULL)
1050 				continue;
1051 			memset(&lft_s, 0, sizeof(lft_s));
1052 			if (sahtree_trackerp != NULL)
1053 				ipsec_sahtree_runlock(sahtree_trackerp);
1054 			error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s);
1055 			if (sahtree_trackerp != NULL)
1056 				ipsec_sahtree_rlock(sahtree_trackerp);
1057 			if (error == 0)
1058 				ipsec_accel_sa_lifetime_update(&lft_l, &lft_s);
1059 		}
1060 		break;
1061 	}
1062 
1063 	if (error == 0) {
1064 		if ((op & IF_SA_CNT_UPD) == 0)
1065 			memset(lft_c, 0, sizeof(*lft_c));
1066 		ipsec_accel_sa_lifetime_update(lft_c, &lft_l);
1067 	}
1068 
1069 	return (error);
1070 }
1071 
1072 static void
1073 ipsec_accel_sync_imp(void)
1074 {
1075 	taskqueue_drain_all(taskqueue_thread);
1076 }
1077 
1078 static struct mbuf *
1079 ipsec_accel_key_setaccelif_impl(struct secasvar *sav)
1080 {
1081 	struct mbuf *m, *m1;
1082 	struct ifp_handle_sav *i;
1083 	struct epoch_tracker et;
1084 
1085 	if (sav->accel_ifname != NULL)
1086 		return (key_setaccelif(sav->accel_ifname));
1087 
1088 	m = m1 = NULL;
1089 
1090 	NET_EPOCH_ENTER(et);
1091 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
1092 		if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
1093 		    IFP_HS_HANDLED) {
1094 			m1 = key_setaccelif(if_name(i->ifp));
1095 			if (m == NULL)
1096 				m = m1;
1097 			else if (m1 != NULL)
1098 				m_cat(m, m1);
1099 		}
1100 	}
1101 	NET_EPOCH_EXIT(et);
1102 	return (m);
1103 }
1104 
1105 #endif	/* IPSEC_OFFLOAD */
1106