xref: /freebsd/sys/netipsec/ipsec_output.c (revision 5ca8c28cd8c725b81781201cfdb5f9969396f934)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
5  * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * IPsec output processing.
32  */
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35 #include "opt_ipsec.h"
36 #include "opt_sctp.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/mbuf.h>
41 #include <sys/domain.h>
42 #include <sys/protosw.h>
43 #include <sys/socket.h>
44 #include <sys/errno.h>
45 #include <sys/hhook.h>
46 #include <sys/syslog.h>
47 
48 #include <net/if.h>
49 #include <net/if_enc.h>
50 #include <net/if_var.h>
51 #include <net/vnet.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_pcb.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_ecn.h>
60 #ifdef INET6
61 #include <netinet6/ip6_ecn.h>
62 #endif
63 #include <netinet/ip_icmp.h>
64 #include <netinet/tcp_var.h>
65 
66 #include <netinet/ip6.h>
67 #ifdef INET6
68 #include <netinet6/ip6_var.h>
69 #include <netinet6/scope6_var.h>
70 #endif
71 #include <netinet/in_pcb.h>
72 #ifdef INET6
73 #include <netinet/icmp6.h>
74 #endif
75 #if defined(SCTP) || defined(SCTP_SUPPORT)
76 #include <netinet/sctp_crc32.h>
77 #endif
78 
79 #include <netinet/udp.h>
80 #include <netipsec/ah.h>
81 #include <netipsec/esp.h>
82 #include <netipsec/ipsec.h>
83 #ifdef INET6
84 #include <netipsec/ipsec6.h>
85 #endif
86 #include <netipsec/ipsec_support.h>
87 #include <netipsec/ipsec_offload.h>
88 #include <netipsec/ah_var.h>
89 #include <netipsec/esp_var.h>
90 #include <netipsec/ipcomp_var.h>
91 
92 #include <netipsec/xform.h>
93 
94 #include <netipsec/key.h>
95 #include <netipsec/keydb.h>
96 #include <netipsec/key_debug.h>
97 
98 #include <machine/in_cksum.h>
99 
100 #define	IPSEC_OSTAT_INC(proto, name)	do {		\
101 	if ((proto) == IPPROTO_ESP)	\
102 		ESPSTAT_INC(esps_##name);	\
103 	else if ((proto) == IPPROTO_AH)\
104 		AHSTAT_INC(ahs_##name);		\
105 	else					\
106 		IPCOMPSTAT_INC(ipcomps_##name);	\
107 } while (0)
108 
109 static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx);
110 static size_t ipsec_get_pmtu(struct secasvar *sav);
111 
112 #ifdef INET
113 static struct secasvar *
114 ipsec4_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
115     u_int *pidx, int *error)
116 {
117 	struct secasindex *saidx, tmpsaidx;
118 	struct ipsecrequest *isr;
119 	struct sockaddr_in *sin;
120 	struct secasvar *sav;
121 	struct ip *ip;
122 
123 	/*
124 	 * Check system global policy controls.
125 	 */
126 next:
127 	isr = sp->req[*pidx];
128 	if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) ||
129 	    (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) ||
130 	    (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) {
131 		DPRINTF(("%s: IPsec outbound packet dropped due"
132 			" to policy (check your sysctls)\n", __func__));
133 		IPSEC_OSTAT_INC(isr->saidx.proto, pdrops);
134 		*error = EHOSTUNREACH;
135 		return (NULL);
136 	}
137 	/*
138 	 * Craft SA index to search for proper SA.  Note that
139 	 * we only initialize unspecified SA peers for transport
140 	 * mode; for tunnel mode they must already be filled in.
141 	 */
142 	if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
143 		saidx = &tmpsaidx;
144 		*saidx = isr->saidx;
145 		ip = mtod(m, struct ip *);
146 		if (saidx->src.sa.sa_len == 0) {
147 			sin = &saidx->src.sin;
148 			sin->sin_len = sizeof(*sin);
149 			sin->sin_family = AF_INET;
150 			sin->sin_port = IPSEC_PORT_ANY;
151 			sin->sin_addr = ip->ip_src;
152 		}
153 		if (saidx->dst.sa.sa_len == 0) {
154 			sin = &saidx->dst.sin;
155 			sin->sin_len = sizeof(*sin);
156 			sin->sin_family = AF_INET;
157 			sin->sin_port = IPSEC_PORT_ANY;
158 			sin->sin_addr = ip->ip_dst;
159 		}
160 	} else
161 		saidx = &sp->req[*pidx]->saidx;
162 	/*
163 	 * Lookup SA and validate it.
164 	 */
165 	sav = key_allocsa_policy(sp, saidx, error);
166 	if (sav == NULL) {
167 		IPSECSTAT_INC(ips_out_nosa);
168 		if (*error != 0)
169 			return (NULL);
170 		if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) {
171 			/*
172 			 * We have no SA and policy that doesn't require
173 			 * this IPsec transform, thus we can continue w/o
174 			 * IPsec processing, i.e. return EJUSTRETURN.
175 			 * But first check if there is some bundled transform.
176 			 */
177 			if (sp->tcount > ++(*pidx))
178 				goto next;
179 			*error = EJUSTRETURN;
180 		}
181 		return (NULL);
182 	}
183 	IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform"));
184 	return (sav);
185 }
186 
187 /*
188  * IPsec output logic for IPv4.
189  */
190 static int
191 ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
192     struct inpcb *inp, u_int idx, u_long mtu)
193 {
194 	struct ipsec_ctx_data ctx;
195 	union sockaddr_union *dst;
196 	struct secasvar *sav;
197 	struct ip *ip;
198 	int error, hwassist, i, off;
199 	bool accel;
200 
201 	IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx));
202 
203 	/*
204 	 * We hold the reference to SP. Content of SP couldn't be changed.
205 	 * Craft secasindex and do lookup for suitable SA.
206 	 * Then do encapsulation if needed and call xform's output.
207 	 * We need to store SP in the xform callback parameters.
208 	 * In xform callback we will extract SP and it can be used to
209 	 * determine next transform. At the end of transform we can
210 	 * release reference to SP.
211 	 */
212 	sav = ipsec4_allocsa(ifp, m, sp, &idx, &error);
213 	if (sav == NULL) {
214 		if (error == EJUSTRETURN) { /* No IPsec required */
215 			(void)ipsec_accel_output(ifp, m, inp, sp, NULL,
216 			    AF_INET, mtu, &hwassist);
217 			key_freesp(&sp);
218 			return (error);
219 		}
220 		goto bad;
221 	}
222 	/*
223 	 * XXXAE: most likely ip_sum at this point is wrong.
224 	 */
225 	IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET, IPSEC_ENC_BEFORE);
226 	if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0)
227 		goto bad;
228 
229 	hwassist = 0;
230 	accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu,
231 	    &hwassist);
232 
233 	/*
234 	 * Do delayed checksums now because we send before
235 	 * this is done in the normal processing path.
236 	 */
237 	if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~hwassist) != 0) {
238 		in_delayed_cksum(m);
239 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
240 	}
241 #if defined(SCTP) || defined(SCTP_SUPPORT)
242 	if ((m->m_pkthdr.csum_flags & CSUM_SCTP & ~hwassist) != 0) {
243 		struct ip *ip;
244 
245 		ip = mtod(m, struct ip *);
246 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
247 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
248 	}
249 #endif
250 	if (accel)
251 		return (EJUSTRETURN);
252 
253 	ip = mtod(m, struct ip *);
254 	dst = &sav->sah->saidx.dst;
255 	/* Do the appropriate encapsulation, if necessary */
256 	if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
257 	    dst->sa.sa_family != AF_INET ||	    /* PF mismatch */
258 	    (dst->sa.sa_family == AF_INET &&	    /* Proxy */
259 	     dst->sin.sin_addr.s_addr != INADDR_ANY &&
260 	     dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) {
261 		/* Fix IPv4 header checksum and length */
262 		ip->ip_len = htons(m->m_pkthdr.len);
263 		ip->ip_sum = 0;
264 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
265 		error = ipsec_encap(&m, &sav->sah->saidx);
266 		if (error != 0) {
267 			DPRINTF(("%s: encapsulation for SPI 0x%08x failed "
268 			    "with error %d\n", __func__, ntohl(sav->spi),
269 			    error));
270 			/* XXXAE: IPSEC_OSTAT_INC(tunnel); */
271 			goto bad;
272 		}
273 		inp = NULL;
274 	}
275 
276 	IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER);
277 	if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0)
278 		goto bad;
279 
280 	/*
281 	 * Dispatch to the appropriate IPsec transform logic.  The
282 	 * packet will be returned for transmission after crypto
283 	 * processing, etc. are completed.
284 	 *
285 	 * NB: m & sav are ``passed to caller'' who's responsible for
286 	 *     reclaiming their resources.
287 	 */
288 	switch(dst->sa.sa_family) {
289 	case AF_INET:
290 		ip = mtod(m, struct ip *);
291 		i = ip->ip_hl << 2;
292 		off = offsetof(struct ip, ip_p);
293 		break;
294 #ifdef INET6
295 	case AF_INET6:
296 		i = sizeof(struct ip6_hdr);
297 		off = offsetof(struct ip6_hdr, ip6_nxt);
298 		break;
299 #endif /* INET6 */
300 	default:
301 		DPRINTF(("%s: unsupported protocol family %u\n",
302 		    __func__, dst->sa.sa_family));
303 		error = EPFNOSUPPORT;
304 		IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf);
305 		goto bad;
306 	}
307 	error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off);
308 	return (error);
309 bad:
310 	IPSECSTAT_INC(ips_out_inval);
311 	if (m != NULL)
312 		m_freem(m);
313 	if (sav != NULL)
314 		key_freesav(&sav);
315 	key_freesp(&sp);
316 	return (error);
317 }
318 
319 int
320 ipsec4_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
321     struct inpcb *inp, u_long mtu)
322 {
323 
324 	return (ipsec4_perform_request(ifp, m, sp, inp, 0, mtu));
325 }
326 
327 int
328 ipsec4_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
329     int forwarding)
330 {
331 	struct secasvar *sav;
332 	struct ip *ip;
333 	size_t hlen, pmtu;
334 	uint32_t idx;
335 	int error;
336 
337 	/* Don't check PMTU if the frame won't have DF bit set. */
338 	if (!V_ip4_ipsec_dfbit)
339 		return (0);
340 	if (V_ip4_ipsec_dfbit == 1)
341 		goto setdf;
342 
343 	/* V_ip4_ipsec_dfbit > 1 - we will copy it from inner header. */
344 	ip = mtod(m, struct ip *);
345 	if (!(ip->ip_off & htons(IP_DF)))
346 		return (0);
347 
348 setdf:
349 	idx = sp->tcount - 1;
350 	sav = ipsec4_allocsa(ifp, m, sp, &idx, &error);
351 	if (sav == NULL) {
352 		key_freesp(&sp);
353 		/*
354 		 * No matching SA was found and SADB_ACQUIRE message was generated.
355 		 * Since we have matched a SP to this packet drop it silently.
356 		 */
357 		if (error == 0)
358 			error = EINPROGRESS;
359 		if (error != EJUSTRETURN)
360 			m_freem(m);
361 
362 		return (error);
363 	}
364 
365 	pmtu = ipsec_get_pmtu(sav);
366 	if (pmtu == 0) {
367 		key_freesav(&sav);
368 		return (0);
369 	}
370 
371 	hlen = ipsec_hdrsiz_internal(sp);
372 	key_freesav(&sav);
373 
374 	if (m_length(m, NULL) + hlen > pmtu) {
375 		/*
376 		 * If we're forwarding generate ICMP message here,
377 		 * so that it contains pmtu subtracted by header size.
378 		 * Set error to EINPROGRESS, in order for the frame
379 		 * to be dropped silently.
380 		 */
381 		if (forwarding) {
382 			if (pmtu > hlen)
383 				icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
384 				    0, pmtu - hlen);
385 			else
386 				m_freem(m);
387 
388 			key_freesp(&sp);
389 			return (EINPROGRESS); /* Pretend that we consumed it. */
390 		} else {
391 			m_freem(m);
392 			key_freesp(&sp);
393 			return (EMSGSIZE);
394 		}
395 	}
396 
397 	return (0);
398 }
399 
400 static int
401 ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp,
402     int forwarding, u_long mtu)
403 {
404 	struct secpolicy *sp;
405 	int error;
406 
407 	/* Lookup for the corresponding outbound security policy */
408 	sp = ipsec4_checkpolicy(m, inp, &error, !forwarding);
409 	if (sp == NULL) {
410 		if (error == -EINVAL) {
411 			/* Discarded by policy. */
412 			m_freem(m);
413 			return (EACCES);
414 		}
415 		return (0); /* No IPsec required. */
416 	}
417 
418 	/*
419 	 * Usually we have to have tunnel mode IPsec security policy
420 	 * when we are forwarding a packet. Otherwise we could not handle
421 	 * encrypted replies, because they are not destined for us. But
422 	 * some users are doing source address translation for forwarded
423 	 * packets, and thus, even if they are forwarded, the replies will
424 	 * return back to us.
425 	 */
426 
427 	/* NB: callee frees mbuf and releases reference to SP */
428 	error = ipsec4_check_pmtu(ifp, m, sp, forwarding);
429 	if (error != 0) {
430 		if (error == EJUSTRETURN)
431 			return (0);
432 
433 		return (error);
434 	}
435 
436 	error = ipsec4_process_packet(ifp, m, sp, inp, mtu);
437 	if (error == EJUSTRETURN) {
438 		/*
439 		 * We had a SP with a level of 'use' and no SA. We
440 		 * will just continue to process the packet without
441 		 * IPsec processing and return without error.
442 		 */
443 		return (0);
444 	}
445 	if (error == 0)
446 		return (EINPROGRESS); /* consumed by IPsec */
447 	return (error);
448 }
449 
450 /*
451  * IPSEC_OUTPUT() method implementation for IPv4.
452  * 0 - no IPsec handling needed
453  * other values - mbuf consumed by IPsec.
454  */
455 int
456 ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu)
457 {
458 
459 	/*
460 	 * If the packet is resubmitted to ip_output (e.g. after
461 	 * AH, ESP, etc. processing), there will be a tag to bypass
462 	 * the lookup and related policy checking.
463 	 */
464 	if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL)
465 		return (0);
466 
467 	return (ipsec4_common_output(ifp, m, inp, 0, mtu));
468 }
469 
470 /*
471  * IPSEC_FORWARD() method implementation for IPv4.
472  * 0 - no IPsec handling needed
473  * other values - mbuf consumed by IPsec.
474  */
475 int
476 ipsec4_forward(struct mbuf *m)
477 {
478 
479 	/*
480 	 * Check if this packet has an active inbound SP and needs to be
481 	 * dropped instead of forwarded.
482 	 */
483 	if (ipsec4_in_reject(m, NULL) != 0) {
484 		m_freem(m);
485 		return (EACCES);
486 	}
487 	return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1, 0));
488 }
489 #endif
490 
491 #ifdef INET6
492 static int
493 in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa,
494     const struct in6_addr *ia)
495 {
496 	struct in6_addr ia2;
497 
498 	if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) {
499 		memcpy(&ia2, &sa->sin6_addr, sizeof(ia2));
500 		ia2.s6_addr16[1] = htons(sa->sin6_scope_id);
501 		return (IN6_ARE_ADDR_EQUAL(ia, &ia2));
502 	}
503 	return (IN6_ARE_ADDR_EQUAL(&sa->sin6_addr, ia));
504 }
505 
506 static struct secasvar *
507 ipsec6_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
508     u_int *pidx, int *error)
509 {
510 	struct secasindex *saidx, tmpsaidx;
511 	struct ipsecrequest *isr;
512 	struct sockaddr_in6 *sin6;
513 	struct secasvar *sav;
514 	struct ip6_hdr *ip6;
515 
516 	/*
517 	 * Check system global policy controls.
518 	 */
519 next:
520 	isr = sp->req[*pidx];
521 	if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) ||
522 	    (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) ||
523 	    (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) {
524 		DPRINTF(("%s: IPsec outbound packet dropped due"
525 			" to policy (check your sysctls)\n", __func__));
526 		IPSEC_OSTAT_INC(isr->saidx.proto, pdrops);
527 		*error = EHOSTUNREACH;
528 		return (NULL);
529 	}
530 	/*
531 	 * Craft SA index to search for proper SA.  Note that
532 	 * we only fillin unspecified SA peers for transport
533 	 * mode; for tunnel mode they must already be filled in.
534 	 */
535 	if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
536 		saidx = &tmpsaidx;
537 		*saidx = isr->saidx;
538 		ip6 = mtod(m, struct ip6_hdr *);
539 		if (saidx->src.sin6.sin6_len == 0) {
540 			sin6 = (struct sockaddr_in6 *)&saidx->src;
541 			sin6->sin6_len = sizeof(*sin6);
542 			sin6->sin6_family = AF_INET6;
543 			sin6->sin6_port = IPSEC_PORT_ANY;
544 			sin6->sin6_addr = ip6->ip6_src;
545 			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
546 				/* fix scope id for comparing SPD */
547 				sin6->sin6_addr.s6_addr16[1] = 0;
548 				sin6->sin6_scope_id =
549 				    ntohs(ip6->ip6_src.s6_addr16[1]);
550 			}
551 		}
552 		if (saidx->dst.sin6.sin6_len == 0) {
553 			sin6 = (struct sockaddr_in6 *)&saidx->dst;
554 			sin6->sin6_len = sizeof(*sin6);
555 			sin6->sin6_family = AF_INET6;
556 			sin6->sin6_port = IPSEC_PORT_ANY;
557 			sin6->sin6_addr = ip6->ip6_dst;
558 			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
559 				/* fix scope id for comparing SPD */
560 				sin6->sin6_addr.s6_addr16[1] = 0;
561 				sin6->sin6_scope_id =
562 				    ntohs(ip6->ip6_dst.s6_addr16[1]);
563 			}
564 		}
565 	} else
566 		saidx = &sp->req[*pidx]->saidx;
567 	/*
568 	 * Lookup SA and validate it.
569 	 */
570 	sav = key_allocsa_policy(sp, saidx, error);
571 	if (sav == NULL) {
572 		IPSEC6STAT_INC(ips_out_nosa);
573 		if (*error != 0)
574 			return (NULL);
575 		if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) {
576 			/*
577 			 * We have no SA and policy that doesn't require
578 			 * this IPsec transform, thus we can continue w/o
579 			 * IPsec processing, i.e. return EJUSTRETURN.
580 			 * But first check if there is some bundled transform.
581 			 */
582 			if (sp->tcount > ++(*pidx))
583 				goto next;
584 			*error = EJUSTRETURN;
585 		}
586 		return (NULL);
587 	}
588 	IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform"));
589 	return (sav);
590 }
591 
592 /*
593  * IPsec output logic for IPv6.
594  */
595 static int
596 ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
597     struct inpcb *inp, u_int idx, u_long mtu)
598 {
599 	struct ipsec_ctx_data ctx;
600 	union sockaddr_union *dst;
601 	struct secasvar *sav;
602 	struct ip6_hdr *ip6;
603 	int error, hwassist, i, off;
604 	bool accel;
605 
606 	IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx));
607 
608 	sav = ipsec6_allocsa(ifp, m, sp, &idx, &error);
609 	if (sav == NULL) {
610 		if (error == EJUSTRETURN) { /* No IPsec required */
611 			(void)ipsec_accel_output(ifp, m, inp, sp, NULL,
612 			    AF_INET6, mtu, &hwassist);
613 			key_freesp(&sp);
614 			return (error);
615 		}
616 		goto bad;
617 	}
618 
619 	/* Fix IP length in case if it is not set yet. */
620 	ip6 = mtod(m, struct ip6_hdr *);
621 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
622 
623 	IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET6, IPSEC_ENC_BEFORE);
624 	if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0)
625 		goto bad;
626 
627 	hwassist = 0;
628 	accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu,
629 	    &hwassist);
630 
631 	/*
632 	 * Do delayed checksums now because we send before
633 	 * this is done in the normal processing path.
634 	 */
635 	if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~hwassist) != 0) {
636 		in6_delayed_cksum(m, m->m_pkthdr.len -
637 		    sizeof(struct ip6_hdr), sizeof(struct ip6_hdr));
638 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
639 	}
640 #if defined(SCTP) || defined(SCTP_SUPPORT)
641 	if ((m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6 & ~hwassist) != 0) {
642 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
643 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
644 	}
645 #endif
646 	if (accel)
647 		return (EJUSTRETURN);
648 
649 	ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */
650 	dst = &sav->sah->saidx.dst;
651 
652 	/* Do the appropriate encapsulation, if necessary */
653 	if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
654 	    dst->sa.sa_family != AF_INET6 ||        /* PF mismatch */
655 	    ((dst->sa.sa_family == AF_INET6) &&
656 	     (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) &&
657 	     (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) {
658 		if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) {
659 			/* No jumbogram support. */
660 			error = ENXIO;   /*XXX*/
661 			goto bad;
662 		}
663 		error = ipsec_encap(&m, &sav->sah->saidx);
664 		if (error != 0) {
665 			DPRINTF(("%s: encapsulation for SPI 0x%08x failed "
666 			    "with error %d\n", __func__, ntohl(sav->spi),
667 			    error));
668 			/* XXXAE: IPSEC_OSTAT_INC(tunnel); */
669 			goto bad;
670 		}
671 		inp = NULL;
672 	}
673 
674 	IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER);
675 	if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0)
676 		goto bad;
677 
678 	switch(dst->sa.sa_family) {
679 #ifdef INET
680 	case AF_INET:
681 		{
682 		struct ip *ip;
683 		ip = mtod(m, struct ip *);
684 		i = ip->ip_hl << 2;
685 		off = offsetof(struct ip, ip_p);
686 		}
687 		break;
688 #endif /* AF_INET */
689 	case AF_INET6:
690 		i = sizeof(struct ip6_hdr);
691 		off = offsetof(struct ip6_hdr, ip6_nxt);
692 		break;
693 	default:
694 		DPRINTF(("%s: unsupported protocol family %u\n",
695 				 __func__, dst->sa.sa_family));
696 		error = EPFNOSUPPORT;
697 		IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf);
698 		goto bad;
699 	}
700 	error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off);
701 	return (error);
702 bad:
703 	IPSEC6STAT_INC(ips_out_inval);
704 	if (m != NULL)
705 		m_freem(m);
706 	if (sav != NULL)
707 		key_freesav(&sav);
708 	key_freesp(&sp);
709 	return (error);
710 }
711 
712 int
713 ipsec6_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
714     struct inpcb *inp, u_long mtu)
715 {
716 
717 	return (ipsec6_perform_request(ifp, m, sp, inp, 0, mtu));
718 }
719 
720 /*
721  * IPv6 implementation is based on IPv4 implementation.
722  */
723 int
724 ipsec6_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp,
725     int forwarding)
726 {
727 	struct secasvar *sav;
728 	size_t hlen, pmtu;
729 	uint32_t idx;
730 	int error;
731 
732 	/*
733 	 * According to RFC8200 L3 fragmentation is supposed to be done only on
734 	 * locally generated packets. During L3 forwarding packets that are too
735 	 * big are always supposed to be dropped, with an ICMPv6 packet being
736 	 * sent back.
737 	 */
738 	if (!forwarding)
739 		return (0);
740 
741 	idx = sp->tcount - 1;
742 	sav = ipsec6_allocsa(ifp, m, sp, &idx, &error);
743 	if (sav == NULL) {
744 		key_freesp(&sp);
745 		/*
746 		 * No matching SA was found and SADB_ACQUIRE message was generated.
747 		 * Since we have matched a SP to this packet drop it silently.
748 		 */
749 		if (error == 0)
750 			error = EINPROGRESS;
751 		if (error != EJUSTRETURN)
752 			m_freem(m);
753 
754 		return (error);
755 	}
756 
757 	pmtu = ipsec_get_pmtu(sav);
758 	if (pmtu == 0) {
759 		key_freesav(&sav);
760 		return (0);
761 	}
762 
763 	hlen = ipsec_hdrsiz_internal(sp);
764 	key_freesav(&sav);
765 
766 	if (m_length(m, NULL) + hlen > pmtu) {
767 		/*
768 		 * If we're forwarding generate ICMPv6 message here,
769 		 * so that it contains pmtu subtracted by header size.
770 		 * Set error to EINPROGRESS, in order for the frame
771 		 * to be dropped silently.
772 		 */
773 		if (forwarding) {
774 			if (pmtu > hlen)
775 				icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, pmtu - hlen);
776 			else
777 				m_freem(m);
778 
779 			key_freesp(&sp);
780 			return (EINPROGRESS); /* Pretend that we consumed it. */
781 		}
782 	}
783 
784 	return (0);
785 }
786 
787 static int
788 ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp,
789     int forwarding, u_long mtu)
790 {
791 	struct secpolicy *sp;
792 	int error;
793 
794 	/* Lookup for the corresponding outbound security policy */
795 	sp = ipsec6_checkpolicy(m, inp, &error, !forwarding);
796 	if (sp == NULL) {
797 		if (error == -EINVAL) {
798 			/* Discarded by policy. */
799 			m_freem(m);
800 			return (EACCES);
801 		}
802 		return (0); /* No IPsec required. */
803 	}
804 
805 	error = ipsec6_check_pmtu(ifp, m, sp, forwarding);
806 	if (error != 0) {
807 		if (error == EJUSTRETURN)
808 			return (0);
809 
810 		return (error);
811 	}
812 
813 	/* NB: callee frees mbuf and releases reference to SP */
814 	error = ipsec6_process_packet(ifp, m, sp, inp, mtu);
815 	if (error == EJUSTRETURN) {
816 		/*
817 		 * We had a SP with a level of 'use' and no SA. We
818 		 * will just continue to process the packet without
819 		 * IPsec processing and return without error.
820 		 */
821 		return (0);
822 	}
823 	if (error == 0)
824 		return (EINPROGRESS); /* consumed by IPsec */
825 	return (error);
826 }
827 
828 /*
829  * IPSEC_OUTPUT() method implementation for IPv6.
830  * 0 - no IPsec handling needed
831  * other values - mbuf consumed by IPsec.
832  */
833 int
834 ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu)
835 {
836 
837 	/*
838 	 * If the packet is resubmitted to ip_output (e.g. after
839 	 * AH, ESP, etc. processing), there will be a tag to bypass
840 	 * the lookup and related policy checking.
841 	 */
842 	if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL)
843 		return (0);
844 
845 	return (ipsec6_common_output(ifp, m, inp, 0, mtu));
846 }
847 
848 /*
849  * IPSEC_FORWARD() method implementation for IPv6.
850  * 0 - no IPsec handling needed
851  * other values - mbuf consumed by IPsec.
852  */
853 int
854 ipsec6_forward(struct mbuf *m)
855 {
856 
857 	/*
858 	 * Check if this packet has an active inbound SP and needs to be
859 	 * dropped instead of forwarded.
860 	 */
861 	if (ipsec6_in_reject(m, NULL) != 0) {
862 		m_freem(m);
863 		return (EACCES);
864 	}
865 	return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1, 0));
866 }
867 #endif /* INET6 */
868 
869 int
870 ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
871     u_int idx)
872 {
873 	struct epoch_tracker et;
874 	struct xform_history *xh;
875 	struct secasindex *saidx;
876 	struct m_tag *mtag;
877 	int error;
878 
879 	if (sav->state >= SADB_SASTATE_DEAD) {
880 		error = ESRCH;
881 		goto bad;
882 	}
883 	saidx = &sav->sah->saidx;
884 	switch (saidx->dst.sa.sa_family) {
885 #ifdef INET
886 	case AF_INET:
887 		/* Fix the header length, for AH processing. */
888 		mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len);
889 		break;
890 #endif /* INET */
891 #ifdef INET6
892 	case AF_INET6:
893 		/* Fix the header length, for AH processing. */
894 		if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) {
895 			error = ENXIO;
896 			goto bad;
897 		}
898 		if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) {
899 			/* No jumbogram support. */
900 			error = ENXIO;	/*?*/
901 			goto bad;
902 		}
903 		mtod(m, struct ip6_hdr *)->ip6_plen =
904 			htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
905 		break;
906 #endif /* INET6 */
907 	default:
908 		DPRINTF(("%s: unknown protocol family %u\n", __func__,
909 		    saidx->dst.sa.sa_family));
910 		error = ENXIO;
911 		goto bad;
912 	}
913 
914 	/*
915 	 * Add a record of what we've done to the packet.
916 	 */
917 	mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(*xh), M_NOWAIT);
918 	if (mtag == NULL) {
919 		DPRINTF(("%s: could not get packet tag\n", __func__));
920 		error = ENOMEM;
921 		goto bad;
922 	}
923 
924 	xh = (struct xform_history *)(mtag + 1);
925 	xh->dst = saidx->dst;
926 	xh->proto = saidx->proto;
927 	xh->mode = saidx->mode;
928 	xh->spi = sav->spi;
929 	m_tag_prepend(m, mtag);
930 
931 	key_sa_recordxfer(sav, m);		/* record data transfer */
932 
933 	/*
934 	 * If there's another (bundled) SA to apply, do so.
935 	 * Note that this puts a burden on the kernel stack size.
936 	 * If this is a problem we'll need to introduce a queue
937 	 * to set the packet on so we can unwind the stack before
938 	 * doing further processing.
939 	 */
940 	if (++idx < sp->tcount) {
941 		switch (saidx->dst.sa.sa_family) {
942 #ifdef INET
943 		case AF_INET:
944 			key_freesav(&sav);
945 			IPSECSTAT_INC(ips_out_bundlesa);
946 			return (ipsec4_perform_request(NULL, m, sp, NULL,
947 			    idx, 0));
948 			/* NOTREACHED */
949 #endif
950 #ifdef INET6
951 		case AF_INET6:
952 			key_freesav(&sav);
953 			IPSEC6STAT_INC(ips_out_bundlesa);
954 			return (ipsec6_perform_request(NULL, m, sp, NULL,
955 			    idx, 0));
956 			/* NOTREACHED */
957 #endif /* INET6 */
958 		default:
959 			DPRINTF(("%s: unknown protocol family %u\n", __func__,
960 			    saidx->dst.sa.sa_family));
961 			error = EPFNOSUPPORT;
962 			goto bad;
963 		}
964 	}
965 
966 	key_freesp(&sp), sp = NULL;	/* Release reference to SP */
967 #if defined(INET) || defined(INET6)
968 	/*
969 	 * Do UDP encapsulation if SA requires it.
970 	 */
971 	if (sav->natt != NULL) {
972 		error = udp_ipsec_output(m, sav);
973 		if (error != 0)
974 			goto bad;
975 	}
976 #endif /* INET || INET6 */
977 	/*
978 	 * We're done with IPsec processing, transmit the packet using the
979 	 * appropriate network protocol (IP or IPv6).
980 	 */
981 	NET_EPOCH_ENTER(et);
982 	switch (saidx->dst.sa.sa_family) {
983 #ifdef INET
984 	case AF_INET:
985 		key_freesav(&sav);
986 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL);
987 		break;
988 #endif /* INET */
989 #ifdef INET6
990 	case AF_INET6:
991 		key_freesav(&sav);
992 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
993 		break;
994 #endif /* INET6 */
995 	default:
996 		panic("ipsec_process_done");
997 	}
998 	NET_EPOCH_EXIT(et);
999 	return (error);
1000 bad:
1001 	m_freem(m);
1002 	key_freesav(&sav);
1003 	if (sp != NULL)
1004 		key_freesp(&sp);
1005 	return (error);
1006 }
1007 
1008 /*
1009  * ipsec_prepend() is optimized version of M_PREPEND().
1010  * ipsec_encap() is called by IPsec output routine for tunnel mode SA.
1011  * It is expected that after IP encapsulation some IPsec transform will
1012  * be performed. Each IPsec transform inserts its variable length header
1013  * just after outer IP header using m_makespace(). If given mbuf has not
1014  * enough free space at the beginning, we allocate new mbuf and reserve
1015  * some space at the beginning and at the end.
1016  * This helps avoid allocating of new mbuf and data copying in m_makespace(),
1017  * we place outer header in the middle of mbuf's data with reserved leading
1018  * and trailing space:
1019  *	[ LEADINGSPACE ][ Outer IP header ][ TRAILINGSPACE ]
1020  * LEADINGSPACE will be used to add ethernet header, TRAILINGSPACE will
1021  * be used to inject AH/ESP/IPCOMP header.
1022  */
1023 #define	IPSEC_TRAILINGSPACE	(sizeof(struct udphdr) +/* NAT-T */	\
1024     max(sizeof(struct newesp) + EALG_MAX_BLOCK_LEN,	/* ESP + IV */	\
1025 	sizeof(struct newah) + HASH_MAX_LEN		/* AH + ICV */))
1026 static struct mbuf *
1027 ipsec_prepend(struct mbuf *m, int len, int how)
1028 {
1029 	struct mbuf *n;
1030 
1031 	M_ASSERTPKTHDR(m);
1032 	IPSEC_ASSERT(len < MHLEN, ("wrong length"));
1033 	if (M_LEADINGSPACE(m) >= len) {
1034 		/* No need to allocate new mbuf. */
1035 		m->m_data -= len;
1036 		m->m_len += len;
1037 		m->m_pkthdr.len += len;
1038 		return (m);
1039 	}
1040 	n = m_gethdr(how, m->m_type);
1041 	if (n == NULL) {
1042 		m_freem(m);
1043 		return (NULL);
1044 	}
1045 	m_move_pkthdr(n, m);
1046 	n->m_next = m;
1047 	if (len + IPSEC_TRAILINGSPACE < M_SIZE(n))
1048 		m_align(n, len + IPSEC_TRAILINGSPACE);
1049 	n->m_len = len;
1050 	n->m_pkthdr.len += len;
1051 	return (n);
1052 }
1053 
1054 static size_t
1055 ipsec_get_pmtu(struct secasvar *sav)
1056 {
1057 	union sockaddr_union *dst;
1058 	struct in_conninfo inc;
1059 	size_t pmtu;
1060 
1061 	dst = &sav->sah->saidx.dst;
1062 	memset(&inc, 0, sizeof(inc));
1063 
1064 	switch (dst->sa.sa_family) {
1065 #ifdef INET
1066 	case AF_INET:
1067 		inc.inc_faddr = satosin(&dst->sa)->sin_addr;
1068 		break;
1069 #endif
1070 #ifdef INET6
1071 	case AF_INET6:
1072 		inc.inc6_faddr = satosin6(&dst->sa)->sin6_addr;
1073 		inc.inc_flags |= INC_ISIPV6;
1074 		break;
1075 #endif
1076 	default:
1077 		return (0);
1078 	}
1079 
1080 	pmtu = tcp_hc_getmtu(&inc);
1081 	if (pmtu != 0)
1082 		return (pmtu);
1083 
1084 	/* No entry in hostcache. Assume that PMTU is equal to link's MTU */
1085 	switch (dst->sa.sa_family) {
1086 #ifdef INET
1087 	case AF_INET:
1088 		pmtu = tcp_maxmtu(&inc, NULL);
1089 		break;
1090 #endif
1091 #ifdef INET6
1092 	case AF_INET6:
1093 		pmtu = tcp_maxmtu6(&inc, NULL);
1094 		break;
1095 #endif
1096 	default:
1097 		return (0);
1098 	}
1099 	if (pmtu == 0)
1100 		return (0);
1101 
1102 	tcp_hc_updatemtu(&inc, pmtu);
1103 
1104 	return (pmtu);
1105 }
1106 
1107 static int
1108 ipsec_encap(struct mbuf **mp, struct secasindex *saidx)
1109 {
1110 #ifdef INET6
1111 	struct ip6_hdr *ip6;
1112 #endif
1113 	struct ip *ip;
1114 #ifdef INET
1115 	int setdf;
1116 #endif
1117 	uint8_t itos, proto;
1118 
1119 	ip = mtod(*mp, struct ip *);
1120 	switch (ip->ip_v) {
1121 #ifdef INET
1122 	case IPVERSION:
1123 		proto = IPPROTO_IPIP;
1124 		/*
1125 		 * Collect IP_DF state from the inner header
1126 		 * and honor system-wide control of how to handle it.
1127 		 */
1128 		switch (V_ip4_ipsec_dfbit) {
1129 		case 0:	/* clear in outer header */
1130 		case 1:	/* set in outer header */
1131 			setdf = V_ip4_ipsec_dfbit;
1132 			break;
1133 		default:/* propagate to outer header */
1134 			setdf = (ip->ip_off & htons(IP_DF)) != 0;
1135 		}
1136 		itos = ip->ip_tos;
1137 		break;
1138 #endif
1139 #ifdef INET6
1140 	case (IPV6_VERSION >> 4):
1141 		proto = IPPROTO_IPV6;
1142 		ip6 = mtod(*mp, struct ip6_hdr *);
1143 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1144 		/* scoped address handling */
1145 		in6_clearscope(&ip6->ip6_src);
1146 		in6_clearscope(&ip6->ip6_dst);
1147 		break;
1148 #endif
1149 	default:
1150 		return (EAFNOSUPPORT);
1151 	}
1152 	switch (saidx->dst.sa.sa_family) {
1153 #ifdef INET
1154 	case AF_INET:
1155 		if (saidx->src.sa.sa_family != AF_INET ||
1156 		    saidx->src.sin.sin_addr.s_addr == INADDR_ANY ||
1157 		    saidx->dst.sin.sin_addr.s_addr == INADDR_ANY)
1158 			return (EINVAL);
1159 		*mp = ipsec_prepend(*mp, sizeof(struct ip), M_NOWAIT);
1160 		if (*mp == NULL)
1161 			return (ENOBUFS);
1162 		ip = mtod(*mp, struct ip *);
1163 		ip->ip_v = IPVERSION;
1164 		ip->ip_hl = sizeof(struct ip) >> 2;
1165 		ip->ip_p = proto;
1166 		ip->ip_len = htons((*mp)->m_pkthdr.len);
1167 		ip->ip_ttl = V_ip_defttl;
1168 		ip->ip_sum = 0;
1169 		ip->ip_off = setdf ? htons(IP_DF): 0;
1170 		ip->ip_src = saidx->src.sin.sin_addr;
1171 		ip->ip_dst = saidx->dst.sin.sin_addr;
1172 		ip_ecn_ingress(V_ip4_ipsec_ecn, &ip->ip_tos, &itos);
1173 		ip_fillid(ip);
1174 		break;
1175 #endif /* INET */
1176 #ifdef INET6
1177 	case AF_INET6:
1178 		if (saidx->src.sa.sa_family != AF_INET6 ||
1179 		    IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) ||
1180 		    IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr))
1181 			return (EINVAL);
1182 		*mp = ipsec_prepend(*mp, sizeof(struct ip6_hdr), M_NOWAIT);
1183 		if (*mp == NULL)
1184 			return (ENOBUFS);
1185 		ip6 = mtod(*mp, struct ip6_hdr *);
1186 		ip6->ip6_flow = 0;
1187 		ip6->ip6_vfc = IPV6_VERSION;
1188 		ip6->ip6_hlim = V_ip6_defhlim;
1189 		ip6->ip6_nxt = proto;
1190 		ip6->ip6_dst = saidx->dst.sin6.sin6_addr;
1191 		/* For link-local address embed scope zone id */
1192 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
1193 			ip6->ip6_dst.s6_addr16[1] =
1194 			    htons(saidx->dst.sin6.sin6_scope_id & 0xffff);
1195 		ip6->ip6_src = saidx->src.sin6.sin6_addr;
1196 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
1197 			ip6->ip6_src.s6_addr16[1] =
1198 			    htons(saidx->src.sin6.sin6_scope_id & 0xffff);
1199 		ip6->ip6_plen = htons((*mp)->m_pkthdr.len - sizeof(*ip6));
1200 		ip_ecn_ingress(V_ip6_ipsec_ecn, &proto, &itos);
1201 		ip6->ip6_flow |= htonl((uint32_t)proto << 20);
1202 		break;
1203 #endif /* INET6 */
1204 	default:
1205 		return (EAFNOSUPPORT);
1206 	}
1207 	(*mp)->m_flags &= ~(M_BCAST | M_MCAST);
1208 	return (0);
1209 }
1210