xref: /freebsd/sys/netipsec/ipsec_offload.c (revision e9ac41698b2f322d55ccf9da50a3596edb2c1800)
1 /*-
2  * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_inet.h"
27 #include "opt_inet6.h"
28 #include "opt_ipsec.h"
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/ck.h>
33 #include <sys/kernel.h>
34 #include <sys/mbuf.h>
35 #include <sys/pctrie.h>
36 #include <sys/proc.h>
37 #include <sys/socket.h>
38 #include <sys/protosw.h>
39 #include <sys/taskqueue.h>
40 
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/vnet.h>
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip_var.h>
47 #include <netinet/ip6.h>
48 #include <netinet6/ip6_var.h>
49 #include <netinet/in_pcb.h>
50 #include <netinet/tcp_var.h>
51 
52 #include <netipsec/key.h>
53 #include <netipsec/keydb.h>
54 #include <netipsec/key_debug.h>
55 #include <netipsec/xform.h>
56 #include <netipsec/ipsec.h>
57 #include <netipsec/ipsec_offload.h>
58 #include <netipsec/ah_var.h>
59 #include <netipsec/esp.h>
60 #include <netipsec/esp_var.h>
61 #include <netipsec/ipcomp_var.h>
62 
63 #ifdef IPSEC_OFFLOAD
64 
65 static struct mtx ipsec_accel_sav_tmp;
66 static struct unrhdr *drv_spi_unr;
67 static struct mtx ipsec_accel_cnt_lock;
68 
69 struct ipsec_accel_install_newkey_tq {
70 	struct secasvar *sav;
71 	struct vnet *install_vnet;
72 	struct task install_task;
73 };
74 
75 struct ipsec_accel_forget_tq {
76 	struct vnet *forget_vnet;
77 	struct task forget_task;
78 	struct secasvar *sav;
79 };
80 
81 struct ifp_handle_sav {
82 	CK_LIST_ENTRY(ifp_handle_sav) sav_link;
83 	CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link;
84 	struct secasvar *sav;
85 	struct ifnet *ifp;
86 	void *ifdata;
87 	uint64_t drv_spi;
88 	uint32_t flags;
89 	size_t hdr_ext_size;
90 	uint64_t cnt_octets;
91 	uint64_t cnt_allocs;
92 };
93 
94 #define	IFP_HS_HANDLED	0x00000001
95 #define	IFP_HS_REJECTED	0x00000002
96 #define	IFP_HS_INPUT	0x00000004
97 #define	IFP_HS_OUTPUT	0x00000008
98 #define	IFP_HS_MARKER	0x00000010
99 
100 static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles;
101 
102 struct ifp_handle_sp {
103 	CK_LIST_ENTRY(ifp_handle_sp) sp_link;
104 	CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link;
105 	struct secpolicy *sp;
106 	struct ifnet *ifp;
107 	void *ifdata;
108 	uint32_t flags;
109 };
110 
111 #define	IFP_HP_HANDLED	0x00000001
112 #define	IFP_HP_REJECTED	0x00000002
113 #define	IFP_HP_MARKER	0x00000004
114 
115 static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles;
116 
117 static void *
118 drvspi_sa_trie_alloc(struct pctrie *ptree)
119 {
120 	void *res;
121 
122 	res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT);
123 	if (res != NULL)
124 		pctrie_zone_init(res, 0, 0);
125 	return (res);
126 }
127 
128 static void
129 drvspi_sa_trie_free(struct pctrie *ptree, void *node)
130 {
131 	free(node, M_IPSEC_MISC);
132 }
133 
134 PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi,
135     drvspi_sa_trie_alloc, drvspi_sa_trie_free);
136 static struct pctrie drv_spi_pctrie;
137 
138 static void ipsec_accel_sa_newkey_impl(struct secasvar *sav);
139 static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp,
140     u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires);
141 static void ipsec_accel_forget_sav_clear(struct secasvar *sav);
142 static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav,
143     struct ifnet *ifp);
144 static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav,
145     struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op,
146     struct rm_priotracker *sahtree_trackerp);
147 static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m);
148 static void ipsec_accel_sync_imp(void);
149 static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav);
150 static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav);
151 
152 static void
153 ipsec_accel_init(void *arg)
154 {
155 	mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0);
156 	mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0);
157 	drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN,
158 	    IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp);
159 	ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl;
160 	ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl;
161 	ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl;
162 	ipsec_accel_spddel_p = ipsec_accel_spddel_impl;
163 	ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl;
164 	ipsec_accel_sync_p = ipsec_accel_sync_imp;
165 	ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl;
166 	ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl;
167 	pctrie_init(&drv_spi_pctrie);
168 }
169 SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY,
170     ipsec_accel_init, NULL);
171 
172 static void
173 ipsec_accel_fini(void *arg)
174 {
175 	ipsec_accel_sa_newkey_p = NULL;
176 	ipsec_accel_forget_sav_p = NULL;
177 	ipsec_accel_spdadd_p = NULL;
178 	ipsec_accel_spddel_p = NULL;
179 	ipsec_accel_sa_lifetime_op_p = NULL;
180 	ipsec_accel_sync_p = NULL;
181 	ipsec_accel_is_accel_sav_p = NULL;
182 	ipsec_accel_key_setaccelif_p = NULL;
183 	ipsec_accel_sync_imp();
184 	clean_unrhdr(drv_spi_unr);	/* avoid panic, should go later */
185 	clear_unrhdr(drv_spi_unr);
186 	delete_unrhdr(drv_spi_unr);
187 	mtx_destroy(&ipsec_accel_sav_tmp);
188 	mtx_destroy(&ipsec_accel_cnt_lock);
189 }
190 SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY,
191     ipsec_accel_fini, NULL);
192 
193 static void
194 ipsec_accel_alloc_forget_tq(struct secasvar *sav)
195 {
196 	void *ftq;
197 
198 	if (sav->accel_forget_tq != 0)
199 		return;
200 
201 	ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK);
202 	if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq))
203 		free(ftq, M_TEMP);
204 }
205 
206 static bool
207 ipsec_accel_sa_install_match(if_t ifp, void *arg)
208 {
209 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0)
210 		return (false);
211 	if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) {
212 		printf("driver bug ifp %s if_sa_newkey NULL\n",
213 		    if_name(ifp));
214 		return (false);
215 	}
216 	return (true);
217 }
218 
219 static int
220 ipsec_accel_sa_newkey_cb(if_t ifp, void *arg)
221 {
222 	struct ipsec_accel_install_newkey_tq *tq;
223 	void *priv;
224 	u_int drv_spi;
225 	int error;
226 
227 	tq = arg;
228 
229 	printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x "
230 	    "flags %#x seq %d\n",
231 	    if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey,
232 	    be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq);
233 	priv = NULL;
234 	drv_spi = alloc_unr(drv_spi_unr);
235 	if (tq->sav->accel_ifname != NULL &&
236 	    strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) {
237 		error = ipsec_accel_handle_sav(tq->sav,
238 		    ifp, drv_spi, priv, IFP_HS_REJECTED, NULL);
239 		goto out;
240 	}
241 	if (drv_spi == -1) {
242 		/* XXXKIB */
243 		printf("ipsec_accel_sa_install_newkey: cannot alloc "
244 		    "drv_spi if %s spi %#x\n", if_name(ifp),
245 		    be32toh(tq->sav->spi));
246 		return (ENOMEM);
247 	}
248 	error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav,
249 	    drv_spi, &priv);
250 	if (error != 0) {
251 		if (error == EOPNOTSUPP) {
252 			printf("ipsec_accel_sa_newkey: driver "
253 			    "refused sa if %s spi %#x\n",
254 			    if_name(ifp), be32toh(tq->sav->spi));
255 			error = ipsec_accel_handle_sav(tq->sav,
256 			    ifp, drv_spi, priv, IFP_HS_REJECTED, NULL);
257 			/* XXXKIB */
258 		} else {
259 			printf("ipsec_accel_sa_newkey: driver "
260 			    "error %d if %s spi %#x\n",
261 			    error, if_name(ifp), be32toh(tq->sav->spi));
262 			/* XXXKIB */
263 		}
264 	} else {
265 		error = ipsec_accel_handle_sav(tq->sav, ifp,
266 		    drv_spi, priv, IFP_HS_HANDLED, NULL);
267 		if (error != 0) {
268 			/* XXXKIB */
269 			printf("ipsec_accel_sa_newkey: handle_sav "
270 			    "err %d if %s spi %#x\n", error,
271 			    if_name(ifp), be32toh(tq->sav->spi));
272 		}
273 	}
274 out:
275 	return (error);
276 }
277 
278 static void
279 ipsec_accel_sa_newkey_act(void *context, int pending)
280 {
281 	struct ipsec_accel_install_newkey_tq *tq;
282 	void *tqf;
283 	struct secasvar *sav;
284 
285 	tq = context;
286 	tqf = NULL;
287 	sav = tq->sav;
288 	CURVNET_SET(tq->install_vnet);
289 	mtx_lock(&ipsec_accel_sav_tmp);
290 	if ((sav->accel_flags & (SADB_KEY_ACCEL_INST |
291 	    SADB_KEY_ACCEL_DEINST)) == 0 &&
292 	    sav->state == SADB_SASTATE_MATURE) {
293 		sav->accel_flags |= SADB_KEY_ACCEL_INST;
294 		mtx_unlock(&ipsec_accel_sav_tmp);
295 		if_foreach_sleep(ipsec_accel_sa_install_match, context,
296 		    ipsec_accel_sa_newkey_cb, context);
297 		ipsec_accel_alloc_forget_tq(sav);
298 		mtx_lock(&ipsec_accel_sav_tmp);
299 
300 		/*
301 		 * If ipsec_accel_forget_sav() raced with us and set
302 		 * the flag, do its work.  Its task cannot execute in
303 		 * parallel since taskqueue_thread is single-threaded.
304 		 */
305 		if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) {
306 			tqf = (void *)sav->accel_forget_tq;
307 			sav->accel_forget_tq = 0;
308 			ipsec_accel_forget_sav_clear(sav);
309 		}
310 	}
311 	mtx_unlock(&ipsec_accel_sav_tmp);
312 	key_freesav(&tq->sav);
313 	CURVNET_RESTORE();
314 	free(tq, M_TEMP);
315 	free(tqf, M_TEMP);
316 }
317 
318 static void
319 ipsec_accel_sa_newkey_impl(struct secasvar *sav)
320 {
321 	struct ipsec_accel_install_newkey_tq *tq;
322 
323 	if ((sav->accel_flags & (SADB_KEY_ACCEL_INST |
324 	    SADB_KEY_ACCEL_DEINST)) != 0)
325 		return;
326 
327 	printf(
328 	    "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n",
329 	    be32toh(sav->spi), sav->flags, sav->seq);
330 
331 	tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT);
332 	if (tq == NULL) {
333 		printf("ipsec_accel_sa_install_newkey: no memory for tq, "
334 		    "spi %#x\n", be32toh(sav->spi));
335 		/* XXXKIB */
336 		return;
337 	}
338 
339 	refcount_acquire(&sav->refcnt);
340 
341 	TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq);
342 	tq->sav = sav;
343 	tq->install_vnet = curthread->td_vnet;	/* XXXKIB liveness */
344 	taskqueue_enqueue(taskqueue_thread, &tq->install_task);
345 }
346 
347 static int
348 ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp,
349     u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires)
350 {
351 	struct ifp_handle_sav *ihs, *i;
352 	int error;
353 
354 	MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1);
355 
356 	ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO);
357 	ihs->ifp = ifp;
358 	ihs->sav = sav;
359 	ihs->drv_spi = drv_spi;
360 	ihs->ifdata = priv;
361 	ihs->flags = flags;
362 	if ((flags & IFP_HS_OUTPUT) != 0)
363 		ihs->hdr_ext_size = esp_hdrsiz(sav);
364 	mtx_lock(&ipsec_accel_sav_tmp);
365 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
366 		if (i->ifp == ifp) {
367 			error = EALREADY;
368 			goto errout;
369 		}
370 	}
371 	error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs);
372 	if (error != 0)
373 		goto errout;
374 	if_ref(ihs->ifp);
375 	CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link);
376 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link);
377 	mtx_unlock(&ipsec_accel_sav_tmp);
378 	if (ires != NULL)
379 		*ires = ihs;
380 	return (0);
381 errout:
382 	mtx_unlock(&ipsec_accel_sav_tmp);
383 	free(ihs, M_IPSEC_MISC);
384 	if (ires != NULL)
385 		*ires = NULL;
386 	return (error);
387 }
388 
389 static void
390 ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav)
391 {
392 	struct ifnet *ifp;
393 	struct secasvar *sav;
394 
395 	mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED);
396 
397 	CK_LIST_REMOVE(i, sav_link);
398 	CK_LIST_REMOVE(i, sav_allh_link);
399 	DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi);
400 	mtx_unlock(&ipsec_accel_sav_tmp);
401 	NET_EPOCH_WAIT();
402 	ifp = i->ifp;
403 	sav = i->sav;
404 	if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
405 	    IFP_HS_HANDLED) {
406 		printf("sa deinstall %s %p spi %#x ifl %#x\n",
407 		    if_name(ifp), sav, be32toh(sav->spi), i->flags);
408 		ifp->if_ipsec_accel_m->if_sa_deinstall(ifp,
409 		    i->drv_spi, i->ifdata);
410 	}
411 	if_rele(ifp);
412 	free_unr(drv_spi_unr, i->drv_spi);
413 	free(i, M_IPSEC_MISC);
414 	if (freesav)
415 		key_freesav(&sav);
416 	mtx_lock(&ipsec_accel_sav_tmp);
417 }
418 
419 static void
420 ipsec_accel_forget_sav_clear(struct secasvar *sav)
421 {
422 	struct ifp_handle_sav *i;
423 
424 	for (;;) {
425 		i = CK_LIST_FIRST(&sav->accel_ifps);
426 		if (i == NULL)
427 			break;
428 		ipsec_accel_forget_handle_sav(i, false);
429 	}
430 }
431 
432 static void
433 ipsec_accel_forget_sav_act(void *arg, int pending)
434 {
435 	struct ipsec_accel_forget_tq *tq;
436 	struct secasvar *sav;
437 
438 	tq = arg;
439 	sav = tq->sav;
440 	CURVNET_SET(tq->forget_vnet);
441 	mtx_lock(&ipsec_accel_sav_tmp);
442 	ipsec_accel_forget_sav_clear(sav);
443 	mtx_unlock(&ipsec_accel_sav_tmp);
444 	key_freesav(&sav);
445 	CURVNET_RESTORE();
446 	free(tq, M_TEMP);
447 }
448 
449 void
450 ipsec_accel_forget_sav_impl(struct secasvar *sav)
451 {
452 	struct ipsec_accel_forget_tq *tq;
453 
454 	mtx_lock(&ipsec_accel_sav_tmp);
455 	sav->accel_flags |= SADB_KEY_ACCEL_DEINST;
456 	tq = (void *)atomic_load_ptr(&sav->accel_forget_tq);
457 	if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq,
458 	    (uintptr_t)tq, 0)) {
459 		mtx_unlock(&ipsec_accel_sav_tmp);
460 		return;
461 	}
462 	mtx_unlock(&ipsec_accel_sav_tmp);
463 
464 	refcount_acquire(&sav->refcnt);
465 	TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq);
466 	tq->forget_vnet = curthread->td_vnet;
467 	tq->sav = sav;
468 	taskqueue_enqueue(taskqueue_thread, &tq->forget_task);
469 }
470 
471 static void
472 ipsec_accel_on_ifdown_sav(struct ifnet *ifp)
473 {
474 	struct ifp_handle_sav *i, *marker;
475 
476 	marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO);
477 	marker->flags = IFP_HS_MARKER;
478 
479 	mtx_lock(&ipsec_accel_sav_tmp);
480 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker,
481 	    sav_allh_link);
482 	for (;;) {
483 		i = CK_LIST_NEXT(marker, sav_allh_link);
484 		if (i == NULL)
485 			break;
486 		CK_LIST_REMOVE(marker, sav_allh_link);
487 		CK_LIST_INSERT_AFTER(i, marker, sav_allh_link);
488 		if (i->ifp == ifp) {
489 			refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */
490 			ipsec_accel_forget_handle_sav(i, true);
491 		}
492 	}
493 	CK_LIST_REMOVE(marker, sav_allh_link);
494 	mtx_unlock(&ipsec_accel_sav_tmp);
495 	free(marker, M_IPSEC_MISC);
496 }
497 
498 static struct ifp_handle_sav *
499 ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp)
500 {
501 	struct ifp_handle_sav *i;
502 
503 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0)
504 		return (NULL);
505 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
506 		if (i->ifp == ifp)
507 			return (i);
508 	}
509 	return (NULL);
510 }
511 
512 static struct ifp_handle_sav *
513 ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp)
514 {
515 	NET_EPOCH_ASSERT();
516 	return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp));
517 }
518 
519 static bool
520 ipsec_accel_is_accel_sav_impl(struct secasvar *sav)
521 {
522 	return (!CK_LIST_EMPTY(&sav->accel_ifps));
523 }
524 
525 static struct secasvar *
526 ipsec_accel_drvspi_to_sa(u_int drv_spi)
527 {
528 	struct ifp_handle_sav *i;
529 
530 	i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi);
531 	if (i == NULL)
532 		return (NULL);
533 	return (i->sav);
534 }
535 
536 static struct ifp_handle_sp *
537 ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp)
538 {
539 	struct ifp_handle_sp *i;
540 
541 	CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) {
542 		if (i->ifp == ifp)
543 			return (i);
544 	}
545 	return (NULL);
546 }
547 
548 static bool
549 ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp)
550 {
551 	return (ipsec_accel_find_accel_sp(sp, ifp) != NULL);
552 }
553 
554 static int
555 ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp,
556     struct ifp_handle_sp **ip)
557 {
558 	struct ifp_handle_sp *i;
559 
560 	i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO);
561 	i->sp = sp;
562 	i->ifp = ifp;
563 	if_ref(ifp);
564 	i->flags = IFP_HP_HANDLED;
565 	mtx_lock(&ipsec_accel_sav_tmp);
566 	CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link);
567 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link);
568 	mtx_unlock(&ipsec_accel_sav_tmp);
569 	*ip = i;
570 	return (0);
571 }
572 
573 static bool
574 ipsec_accel_spdadd_match(if_t ifp, void *arg)
575 {
576 	struct secpolicy *sp;
577 
578 	if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 ||
579 	    ifp->if_ipsec_accel_m->if_spdadd == NULL)
580 		return (false);
581 	sp = arg;
582 	if (sp->accel_ifname != NULL &&
583 	    strcmp(sp->accel_ifname, if_name(ifp)) != 0)
584 		return (false);
585 	if (ipsec_accel_is_accel_sp(sp, ifp))
586 		return (false);
587 	return (true);
588 }
589 
590 static int
591 ipsec_accel_spdadd_cb(if_t ifp, void *arg)
592 {
593 	struct secpolicy *sp;
594 	struct inpcb *inp;
595 	struct ifp_handle_sp *i;
596 	int error;
597 
598 	sp = arg;
599 	inp = sp->ipsec_accel_add_sp_inp;
600 	printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n",
601 	    if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp);
602 	error = ipsec_accel_remember_sp(sp, ifp, &i);
603 	if (error != 0) {
604 		printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n",
605 		    if_name(ifp), sp, error);
606 		return (error);
607 	}
608 	error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata);
609 	if (error != 0) {
610 		i->flags |= IFP_HP_REJECTED;
611 		printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n",
612 		    if_name(ifp), sp, error);
613 	}
614 	return (error);
615 }
616 
617 static void
618 ipsec_accel_spdadd_act(void *arg, int pending)
619 {
620 	struct secpolicy *sp;
621 	struct inpcb *inp;
622 
623 	sp = arg;
624 	CURVNET_SET(sp->accel_add_tq.adddel_vnet);
625 	if_foreach_sleep(ipsec_accel_spdadd_match, arg,
626 	    ipsec_accel_spdadd_cb, arg);
627 	inp = sp->ipsec_accel_add_sp_inp;
628 	if (inp != NULL) {
629 		INP_WLOCK(inp);
630 		if (!in_pcbrele_wlocked(inp))
631 			INP_WUNLOCK(inp);
632 		sp->ipsec_accel_add_sp_inp = NULL;
633 	}
634 	CURVNET_RESTORE();
635 	key_freesp(&sp);
636 }
637 
638 void
639 ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp)
640 {
641 	struct ipsec_accel_adddel_sp_tq *tq;
642 
643 	if (sp == NULL)
644 		return;
645 	if (sp->tcount == 0 && inp == NULL)
646 		return;
647 	tq = &sp->accel_add_tq;
648 	if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0)
649 		return;
650 	tq->adddel_vnet = curthread->td_vnet;
651 	sp->ipsec_accel_add_sp_inp = inp;
652 	if (inp != NULL)
653 		in_pcbref(inp);
654 	TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp);
655 	key_addref(sp);
656 	taskqueue_enqueue(taskqueue_thread, &tq->adddel_task);
657 }
658 
659 static void
660 ipsec_accel_spddel_act(void *arg, int pending)
661 {
662 	struct ifp_handle_sp *i;
663 	struct secpolicy *sp;
664 	int error;
665 
666 	sp = arg;
667 	CURVNET_SET(sp->accel_del_tq.adddel_vnet);
668 	mtx_lock(&ipsec_accel_sav_tmp);
669 	for (;;) {
670 		i = CK_LIST_FIRST(&sp->accel_ifps);
671 		if (i == NULL)
672 			break;
673 		CK_LIST_REMOVE(i, sp_link);
674 		CK_LIST_REMOVE(i, sp_allh_link);
675 		mtx_unlock(&ipsec_accel_sav_tmp);
676 		NET_EPOCH_WAIT();
677 		if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) ==
678 		    IFP_HP_HANDLED) {
679 			printf("spd deinstall %s %p\n", if_name(i->ifp), sp);
680 			error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp,
681 			    sp, i->ifdata);
682 			if (error != 0) {
683 				printf(
684 		    "ipsec_accel_spddel: %s if_spddel %p res %d\n",
685 				    if_name(i->ifp), sp, error);
686 			}
687 		}
688 		if_rele(i->ifp);
689 		free(i, M_IPSEC_MISC);
690 		mtx_lock(&ipsec_accel_sav_tmp);
691 	}
692 	mtx_unlock(&ipsec_accel_sav_tmp);
693 	key_freesp(&sp);
694 	CURVNET_RESTORE();
695 }
696 
697 void
698 ipsec_accel_spddel_impl(struct secpolicy *sp)
699 {
700 	struct ipsec_accel_adddel_sp_tq *tq;
701 
702 	if (sp == NULL)
703 		return;
704 
705 	tq = &sp->accel_del_tq;
706 	if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0)
707 		return;
708 	tq->adddel_vnet = curthread->td_vnet;
709 	TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp);
710 	key_addref(sp);
711 	taskqueue_enqueue(taskqueue_thread, &tq->adddel_task);
712 }
713 
714 static void
715 ipsec_accel_on_ifdown_sp(struct ifnet *ifp)
716 {
717 	struct ifp_handle_sp *i, *marker;
718 	struct secpolicy *sp;
719 	int error;
720 
721 	marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO);
722 	marker->flags = IFP_HS_MARKER;
723 
724 	mtx_lock(&ipsec_accel_sav_tmp);
725 	CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker,
726 	    sp_allh_link);
727 	for (;;) {
728 		i = CK_LIST_NEXT(marker, sp_allh_link);
729 		if (i == NULL)
730 			break;
731 		CK_LIST_REMOVE(marker, sp_allh_link);
732 		CK_LIST_INSERT_AFTER(i, marker, sp_allh_link);
733 		if (i->ifp != ifp)
734 			continue;
735 
736 		sp = i->sp;
737 		key_addref(sp);
738 		CK_LIST_REMOVE(i, sp_link);
739 		CK_LIST_REMOVE(i, sp_allh_link);
740 		mtx_unlock(&ipsec_accel_sav_tmp);
741 		NET_EPOCH_WAIT();
742 		if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) ==
743 		    IFP_HP_HANDLED) {
744 			printf("spd deinstall %s %p\n", if_name(ifp), sp);
745 			error = ifp->if_ipsec_accel_m->if_spddel(ifp,
746 			    sp, i->ifdata);
747 		}
748 		if (error != 0) {
749 			printf(
750 		    "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n",
751 			    if_name(ifp), sp, error);
752 		}
753 		key_freesp(&sp);
754 		if_rele(ifp);
755 		free(i, M_IPSEC_MISC);
756 		mtx_lock(&ipsec_accel_sav_tmp);
757 	}
758 	CK_LIST_REMOVE(marker, sp_allh_link);
759 	mtx_unlock(&ipsec_accel_sav_tmp);
760 	free(marker, M_IPSEC_MISC);
761 }
762 
763 void
764 ipsec_accel_on_ifdown(struct ifnet *ifp)
765 {
766 	ipsec_accel_on_ifdown_sp(ifp);
767 	ipsec_accel_on_ifdown_sav(ifp);
768 }
769 
770 static bool
771 ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu)
772 {
773 	int alen, blks, hlen, padding, rlen;
774 
775 	rlen = m->m_pkthdr.len - skip;
776 	hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) :
777 	    sizeof(struct newesp)) + sav->ivlen;
778 	blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ?
779 	    sav->tdb_encalgxform->native_blocksize :
780 	    sav->tdb_encalgxform->blocksize);
781 	padding = ((blks - ((rlen + 2) % blks)) % blks) + 2;
782 	alen = xform_ah_authsize(sav->tdb_authalgxform);
783 
784 	return (skip + hlen + rlen + padding + alen <= mtu);
785 }
786 
787 static bool
788 ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi)
789 {
790 	struct ipsec_accel_out_tag *tag;
791 
792 	tag = (struct ipsec_accel_out_tag *)m_tag_get(
793 	    PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT);
794 	if (tag == NULL)
795 		return (false);
796 	tag->drv_spi = drv_spi;
797 	m_tag_prepend(m, &tag->tag);
798 	return (true);
799 }
800 
801 bool
802 ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp,
803     struct secpolicy *sp, struct secasvar *sav, int af, int mtu, int *hwassist)
804 {
805 	struct ifp_handle_sav *i;
806 	struct ip *ip;
807 	struct tcpcb *tp;
808 	u_long ip_len, skip;
809 	bool res;
810 
811 	*hwassist = 0;
812 	res = false;
813 	if (ifp == NULL)
814 		return (res);
815 
816 	M_ASSERTPKTHDR(m);
817 	NET_EPOCH_ASSERT();
818 
819 	if (sav == NULL) {
820 		res = ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS);
821 		goto out;
822 	}
823 
824 	i = ipsec_accel_is_accel_sav_ptr(sav, ifp);
825 	if (i == NULL)
826 		goto out;
827 
828 	if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
829 		ip_len = m->m_pkthdr.len;
830 		if (ip_len + i->hdr_ext_size > mtu)
831 			goto out;
832 		switch (af) {
833 		case AF_INET:
834 			ip = mtod(m, struct ip *);
835 			skip = ip->ip_hl << 2;
836 			break;
837 		case AF_INET6:
838 			skip = sizeof(struct ip6_hdr);
839 			break;
840 		default:
841 			__unreachable();
842 		}
843 		if (!ipsec_accel_output_pad(m, sav, skip, mtu))
844 			goto out;
845 	}
846 
847 	if (!ipsec_accel_output_tag(m, i->drv_spi))
848 		goto out;
849 
850 	ipsec_accel_sa_recordxfer(sav, m);
851 	key_freesav(&sav);
852 	if (sp != NULL)
853 		key_freesp(&sp);
854 
855 	*hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav,
856 	    i->drv_spi, i->ifdata);
857 	res = true;
858 out:
859 	if (inp != NULL && inp->inp_pcbinfo == &V_tcbinfo) {
860 		INP_WLOCK_ASSERT(inp);
861 		tp = (struct tcpcb *)inp;
862 		if (res && (*hwassist & (CSUM_TSO | CSUM_IP6_TSO)) != 0) {
863 			tp->t_flags2 |= TF2_IPSEC_TSO;
864 		} else {
865 			tp->t_flags2 &= ~TF2_IPSEC_TSO;
866 		}
867 	}
868 	return (res);
869 }
870 
871 struct ipsec_accel_in_tag *
872 ipsec_accel_input_tag_lookup(const struct mbuf *m)
873 {
874 	struct ipsec_accel_in_tag *tag;
875 	struct m_tag *xtag;
876 
877 	xtag = m_tag_find(__DECONST(struct mbuf *, m),
878 	    PACKET_TAG_IPSEC_ACCEL_IN, NULL);
879 	if (xtag == NULL)
880 		return (NULL);
881 	tag = __containerof(xtag, struct ipsec_accel_in_tag, tag);
882 	return (tag);
883 }
884 
885 int
886 ipsec_accel_input(struct mbuf *m, int offset, int proto)
887 {
888 	struct secasvar *sav;
889 	struct ipsec_accel_in_tag *tag;
890 
891 	tag = ipsec_accel_input_tag_lookup(m);
892 	if (tag == NULL)
893 		return (ENXIO);
894 
895 	if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN ||
896 	    tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) {
897 		printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n",
898 		    (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) :
899 		    "<unknwn>", m, tag->drv_spi);
900 		m_freem(m);
901 		return (EINPROGRESS);
902 	}
903 
904 	sav = ipsec_accel_drvspi_to_sa(tag->drv_spi);
905 	if (sav != NULL)
906 		ipsec_accel_sa_recordxfer(sav, m);
907 	return (0);
908 }
909 
910 static void
911 ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m)
912 {
913 	counter_u64_add(sav->accel_lft_sw, 1);
914 	counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len);
915 	if (sav->accel_firstused == 0)
916 		sav->accel_firstused = time_second;
917 }
918 
919 static void
920 ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c,
921     const struct seclifetime *lft_l)
922 {
923 	lft_c->allocations += lft_l->allocations;
924 	lft_c->bytes += lft_l->bytes;
925 	lft_c->usetime = min(lft_c->usetime, lft_l->usetime);
926 }
927 
928 void
929 ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp,
930     u_int drv_spi, uint64_t octets, uint64_t allocs)
931 {
932 	struct epoch_tracker et;
933 	struct ifp_handle_sav *i;
934 	uint64_t odiff, adiff;
935 
936 	NET_EPOCH_ENTER(et);
937 	mtx_lock(&ipsec_accel_cnt_lock);
938 
939 	if (allocs != 0) {
940 		if (sav->firstused == 0)
941 			sav->firstused = time_second;
942 		if (sav->accel_firstused == 0)
943 			sav->accel_firstused = time_second;
944 	}
945 
946 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
947 		if (i->ifp == ifp && i->drv_spi == drv_spi)
948 			break;
949 	}
950 	if (i == NULL)
951 		goto out;
952 
953 	odiff = octets - i->cnt_octets;
954 	adiff = allocs - i->cnt_allocs;
955 
956 	if (sav->lft_c != NULL) {
957 		counter_u64_add(sav->lft_c_bytes, odiff);
958 		counter_u64_add(sav->lft_c_allocations, adiff);
959 	}
960 
961 	i->cnt_octets = octets;
962 	i->cnt_allocs = allocs;
963 	sav->accel_hw_octets += odiff;
964 	sav->accel_hw_allocs += adiff;
965 
966 out:
967 	mtx_unlock(&ipsec_accel_cnt_lock);
968 	NET_EPOCH_EXIT(et);
969 }
970 
971 static void
972 ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp,
973     struct seclifetime *lft)
974 {
975 	struct ifp_handle_sav *i;
976 	if_sa_cnt_fn_t p;
977 
978 	IFNET_RLOCK_ASSERT();
979 
980 	i = ipsec_accel_is_accel_sav_ptr(sav, ifp);
981 	if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
982 	    IFP_HS_HANDLED) {
983 		p = ifp->if_ipsec_accel_m->if_sa_cnt;
984 		if (p != NULL)
985 			p(ifp, sav, i->drv_spi, i->ifdata, lft);
986 	}
987 }
988 
989 static int
990 ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav,
991     struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op,
992     struct rm_priotracker *sahtree_trackerp)
993 {
994 	struct seclifetime lft_l, lft_s;
995 	struct ifp_handle_sav *i;
996 	if_t ifp1;
997 	if_sa_cnt_fn_t p;
998 	int error;
999 
1000 	error = 0;
1001 	memset(&lft_l, 0, sizeof(lft_l));
1002 	memset(&lft_s, 0, sizeof(lft_s));
1003 
1004 	switch (op & ~IF_SA_CNT_UPD) {
1005 	case IF_SA_CNT_IFP_HW_VAL:
1006 		ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l);
1007 		ipsec_accel_sa_lifetime_update(&lft_l, &lft_s);
1008 		break;
1009 
1010 	case IF_SA_CNT_TOTAL_SW_VAL:
1011 		lft_l.allocations = (uint32_t)counter_u64_fetch(
1012 		    sav->accel_lft_sw);
1013 		lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1);
1014 		lft_l.usetime = sav->accel_firstused;
1015 		break;
1016 
1017 	case IF_SA_CNT_TOTAL_HW_VAL:
1018 		IFNET_RLOCK_ASSERT();
1019 		CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
1020 			if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) !=
1021 			    IFP_HS_HANDLED)
1022 				continue;
1023 			ifp1 = i->ifp;
1024 			p = ifp1->if_ipsec_accel_m->if_sa_cnt;
1025 			if (p == NULL)
1026 				continue;
1027 			memset(&lft_s, 0, sizeof(lft_s));
1028 			if (sahtree_trackerp != NULL)
1029 				ipsec_sahtree_runlock(sahtree_trackerp);
1030 			error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s);
1031 			if (sahtree_trackerp != NULL)
1032 				ipsec_sahtree_rlock(sahtree_trackerp);
1033 			if (error == 0)
1034 				ipsec_accel_sa_lifetime_update(&lft_l, &lft_s);
1035 		}
1036 		break;
1037 	}
1038 
1039 	if (error == 0) {
1040 		if ((op & IF_SA_CNT_UPD) == 0)
1041 			memset(lft_c, 0, sizeof(*lft_c));
1042 		ipsec_accel_sa_lifetime_update(lft_c, &lft_l);
1043 	}
1044 
1045 	return (error);
1046 }
1047 
1048 static void
1049 ipsec_accel_sync_imp(void)
1050 {
1051 	taskqueue_drain_all(taskqueue_thread);
1052 }
1053 
1054 static struct mbuf *
1055 ipsec_accel_key_setaccelif_impl(struct secasvar *sav)
1056 {
1057 	struct mbuf *m, *m1;
1058 	struct ifp_handle_sav *i;
1059 	struct epoch_tracker et;
1060 
1061 	if (sav->accel_ifname != NULL)
1062 		return (key_setaccelif(sav->accel_ifname));
1063 
1064 	m = m1 = NULL;
1065 
1066 	NET_EPOCH_ENTER(et);
1067 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
1068 		if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) ==
1069 		    IFP_HS_HANDLED) {
1070 			m1 = key_setaccelif(if_name(i->ifp));
1071 			if (m == NULL)
1072 				m = m1;
1073 			else if (m1 != NULL)
1074 				m_cat(m, m1);
1075 		}
1076 	}
1077 	NET_EPOCH_EXIT(et);
1078 	return (m);
1079 }
1080 
1081 #endif	/* IPSEC_OFFLOAD */
1082