xref: /freebsd/sys/netipsec/ipsec.c (revision 3e5645b78f476816ca3b5acc28b29bbafbb9c444)
1 /*	$FreeBSD$	*/
2 /*	$KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $	*/
3 
4 /*-
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * IPsec controller part.
35  */
36 
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_ipsec.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/domain.h>
46 #include <sys/priv.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/errno.h>
51 #include <sys/time.h>
52 #include <sys/kernel.h>
53 #include <sys/syslog.h>
54 #include <sys/sysctl.h>
55 #include <sys/proc.h>
56 
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/vnet.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/in_var.h>
66 #include <netinet/udp.h>
67 #include <netinet/udp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/udp.h>
70 
71 #include <netinet/ip6.h>
72 #ifdef INET6
73 #include <netinet6/ip6_var.h>
74 #endif
75 #include <netinet/in_pcb.h>
76 #ifdef INET6
77 #include <netinet/icmp6.h>
78 #endif
79 
80 #include <sys/types.h>
81 #include <netipsec/ipsec.h>
82 #ifdef INET6
83 #include <netipsec/ipsec6.h>
84 #endif
85 #include <netipsec/ah_var.h>
86 #include <netipsec/esp_var.h>
87 #include <netipsec/ipcomp.h>		/*XXX*/
88 #include <netipsec/ipcomp_var.h>
89 
90 #include <netipsec/key.h>
91 #include <netipsec/keydb.h>
92 #include <netipsec/key_debug.h>
93 
94 #include <netipsec/xform.h>
95 
96 #include <machine/in_cksum.h>
97 
98 #include <opencrypto/cryptodev.h>
99 
100 #ifdef IPSEC_DEBUG
101 VNET_DEFINE(int, ipsec_debug) = 1;
102 #else
103 VNET_DEFINE(int, ipsec_debug) = 0;
104 #endif
105 
106 /* NB: name changed so netstat doesn't use it. */
107 VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec4stat);
108 VNET_PCPUSTAT_SYSINIT(ipsec4stat);
109 
110 #ifdef VIMAGE
111 VNET_PCPUSTAT_SYSUNINIT(ipsec4stat);
112 #endif /* VIMAGE */
113 
114 VNET_DEFINE(int, ip4_ah_offsetmask) = 0;	/* maybe IP_DF? */
115 /* DF bit on encap. 0: clear 1: set 2: copy */
116 VNET_DEFINE(int, ip4_ipsec_dfbit) = 0;
117 VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE;
118 VNET_DEFINE(int, ip4_esp_net_deflev) = IPSEC_LEVEL_USE;
119 VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE;
120 VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE;
121 /* ECN ignore(-1)/forbidden(0)/allowed(1) */
122 VNET_DEFINE(int, ip4_ipsec_ecn) = 0;
123 VNET_DEFINE(int, ip4_esp_randpad) = -1;
124 
125 static VNET_DEFINE(struct secpolicy, def_policy);
126 #define	V_def_policy	VNET(def_policy)
127 /*
128  * Crypto support requirements:
129  *
130  *  1	require hardware support
131  * -1	require software support
132  *  0	take anything
133  */
134 VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE;
135 
136 FEATURE(ipsec, "Internet Protocol Security (IPsec)");
137 #ifdef IPSEC_NAT_T
138 FEATURE(ipsec_natt, "UDP Encapsulation of IPsec ESP Packets ('NAT-T')");
139 #endif
140 
141 SYSCTL_DECL(_net_inet_ipsec);
142 
143 /* net.inet.ipsec */
144 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy,
145 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0,
146 	"IPsec default policy.");
147 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
148 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0,
149 	"Default ESP transport mode level");
150 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
151 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_net_deflev), 0,
152 	"Default ESP tunnel mode level.");
153 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
154 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_trans_deflev), 0,
155 	"AH transfer mode default level.");
156 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
157 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_net_deflev), 0,
158 	"AH tunnel mode default level.");
159 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos,
160 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0,
161 	"If set clear type-of-service field when doing AH computation.");
162 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask,
163 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0,
164 	"If not set clear offset field mask when doing AH computation.");
165 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit,
166 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0,
167 	"Do not fragment bit on encap.");
168 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn,
169 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0,
170 	"Explicit Congestion Notification handling.");
171 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug,
172 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0,
173 	"Enable IPsec debugging output when set.");
174 SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support,
175 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0,
176 	"Crypto driver selection.");
177 SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat,
178     ipsec4stat, "IPsec IPv4 statistics.");
179 
180 #ifdef REGRESSION
181 /*
182  * When set to 1, IPsec will send packets with the same sequence number.
183  * This allows to verify if the other side has proper replay attacks detection.
184  */
185 VNET_DEFINE(int, ipsec_replay) = 0;
186 SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_replay,
187 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_replay), 0,
188 	"Emulate replay attack");
189 /*
190  * When set 1, IPsec will send packets with corrupted HMAC.
191  * This allows to verify if the other side properly detects modified packets.
192  */
193 VNET_DEFINE(int, ipsec_integrity) = 0;
194 SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_integrity,
195 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_integrity), 0,
196 	"Emulate man-in-the-middle attack");
197 #endif
198 
199 #ifdef INET6
200 VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec6stat);
201 VNET_PCPUSTAT_SYSINIT(ipsec6stat);
202 
203 #ifdef VIMAGE
204 VNET_PCPUSTAT_SYSUNINIT(ipsec6stat);
205 #endif /* VIMAGE */
206 
207 VNET_DEFINE(int, ip6_esp_trans_deflev) = IPSEC_LEVEL_USE;
208 VNET_DEFINE(int, ip6_esp_net_deflev) = IPSEC_LEVEL_USE;
209 VNET_DEFINE(int, ip6_ah_trans_deflev) = IPSEC_LEVEL_USE;
210 VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE;
211 VNET_DEFINE(int, ip6_ipsec_ecn) = 0;	/* ECN ignore(-1)/forbidden(0)/allowed(1) */
212 
213 SYSCTL_DECL(_net_inet6_ipsec6);
214 
215 /* net.inet6.ipsec6 */
216 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy,
217 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0,
218 	"IPsec default policy.");
219 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
220 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0,
221 	"Default ESP transport mode level.");
222 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
223 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_net_deflev), 0,
224 	"Default ESP tunnel mode level.");
225 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
226 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_trans_deflev), 0,
227 	"AH transfer mode default level.");
228 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
229 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_net_deflev), 0,
230 	"AH tunnel mode default level.");
231 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn,
232 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0,
233 	"Explicit Congestion Notification handling.");
234 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug,
235 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0,
236 	"Enable IPsec debugging output when set.");
237 SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats,
238     struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics.");
239 #endif /* INET6 */
240 
241 static int ipsec_in_reject(struct secpolicy *, struct mbuf *);
242 static int ipsec_setspidx_inpcb(struct mbuf *, struct inpcb *);
243 static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int);
244 static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int);
245 static int ipsec4_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *);
246 #ifdef INET6
247 static void ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *, int);
248 static int ipsec6_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *);
249 #endif
250 static void ipsec_delpcbpolicy(struct inpcbpolicy *);
251 static struct secpolicy *ipsec_deepcopy_policy(struct secpolicy *src);
252 static void vshiftl(unsigned char *, int, int);
253 
254 MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy");
255 
256 /*
257  * Return a held reference to the default SP.
258  */
259 static struct secpolicy *
260 key_allocsp_default(const char* where, int tag)
261 {
262 	struct secpolicy *sp;
263 
264 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
265 		printf("DP key_allocsp_default from %s:%u\n", where, tag));
266 
267 	sp = &V_def_policy;
268 	if (sp->policy != IPSEC_POLICY_DISCARD &&
269 	    sp->policy != IPSEC_POLICY_NONE) {
270 		ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n",
271 		    sp->policy, IPSEC_POLICY_NONE));
272 		sp->policy = IPSEC_POLICY_NONE;
273 	}
274 	key_addref(sp);
275 
276 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
277 		printf("DP key_allocsp_default returns SP:%p (%u)\n",
278 			sp, sp->refcnt));
279 	return (sp);
280 }
281 #define	KEY_ALLOCSP_DEFAULT() \
282 	key_allocsp_default(__FILE__, __LINE__)
283 
284 /*
285  * For OUTBOUND packet having a socket. Searching SPD for packet,
286  * and return a pointer to SP.
287  * OUT:	NULL:	no apropreate SP found, the following value is set to error.
288  *		0	: bypass
289  *		EACCES	: discard packet.
290  *		ENOENT	: ipsec_acquire() in progress, maybe.
291  *		others	: error occured.
292  *	others:	a pointer to SP
293  *
294  * NOTE: IPv6 mapped adddress concern is implemented here.
295  */
296 struct secpolicy *
297 ipsec_getpolicy(struct tdb_ident *tdbi, u_int dir)
298 {
299 	struct secpolicy *sp;
300 
301 	IPSEC_ASSERT(tdbi != NULL, ("null tdbi"));
302 	IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND,
303 		("invalid direction %u", dir));
304 
305 	sp = KEY_ALLOCSP2(tdbi->spi, &tdbi->dst, tdbi->proto, dir);
306 	if (sp == NULL)			/*XXX????*/
307 		sp = KEY_ALLOCSP_DEFAULT();
308 	IPSEC_ASSERT(sp != NULL, ("null SP"));
309 	return (sp);
310 }
311 
312 /*
313  * For OUTBOUND packet having a socket. Searching SPD for packet,
314  * and return a pointer to SP.
315  * OUT:	NULL:	no apropreate SP found, the following value is set to error.
316  *		0	: bypass
317  *		EACCES	: discard packet.
318  *		ENOENT	: ipsec_acquire() in progress, maybe.
319  *		others	: error occured.
320  *	others:	a pointer to SP
321  *
322  * NOTE: IPv6 mapped adddress concern is implemented here.
323  */
324 static struct secpolicy *
325 ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb *inp, int *error)
326 {
327 	struct inpcbpolicy *pcbsp;
328 	struct secpolicy *currsp = NULL;	/* Policy on socket. */
329 	struct secpolicy *sp;
330 
331 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
332 	IPSEC_ASSERT(inp != NULL, ("null inpcb"));
333 	IPSEC_ASSERT(error != NULL, ("null error"));
334 	IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND,
335 		("invalid direction %u", dir));
336 
337 	/* Set spidx in pcb. */
338 	*error = ipsec_setspidx_inpcb(m, inp);
339 	if (*error)
340 		return (NULL);
341 
342 	pcbsp = inp->inp_sp;
343 	IPSEC_ASSERT(pcbsp != NULL, ("null pcbsp"));
344 	switch (dir) {
345 	case IPSEC_DIR_INBOUND:
346 		currsp = pcbsp->sp_in;
347 		break;
348 	case IPSEC_DIR_OUTBOUND:
349 		currsp = pcbsp->sp_out;
350 		break;
351 	}
352 	IPSEC_ASSERT(currsp != NULL, ("null currsp"));
353 
354 	if (pcbsp->priv) {			/* When privilieged socket. */
355 		switch (currsp->policy) {
356 		case IPSEC_POLICY_BYPASS:
357 		case IPSEC_POLICY_IPSEC:
358 			key_addref(currsp);
359 			sp = currsp;
360 			break;
361 
362 		case IPSEC_POLICY_ENTRUST:
363 			/* Look for a policy in SPD. */
364 			sp = KEY_ALLOCSP(&currsp->spidx, dir);
365 			if (sp == NULL)		/* No SP found. */
366 				sp = KEY_ALLOCSP_DEFAULT();
367 			break;
368 
369 		default:
370 			ipseclog((LOG_ERR, "%s: Invalid policy for PCB %d\n",
371 				__func__, currsp->policy));
372 			*error = EINVAL;
373 			return (NULL);
374 		}
375 	} else {				/* Unpriv, SPD has policy. */
376 		sp = KEY_ALLOCSP(&currsp->spidx, dir);
377 		if (sp == NULL) {		/* No SP found. */
378 			switch (currsp->policy) {
379 			case IPSEC_POLICY_BYPASS:
380 				ipseclog((LOG_ERR, "%s: Illegal policy for "
381 					"non-priviliged defined %d\n",
382 					__func__, currsp->policy));
383 				*error = EINVAL;
384 				return (NULL);
385 
386 			case IPSEC_POLICY_ENTRUST:
387 				sp = KEY_ALLOCSP_DEFAULT();
388 				break;
389 
390 			case IPSEC_POLICY_IPSEC:
391 				key_addref(currsp);
392 				sp = currsp;
393 				break;
394 
395 			default:
396 				ipseclog((LOG_ERR, "%s: Invalid policy for "
397 					"PCB %d\n", __func__, currsp->policy));
398 				*error = EINVAL;
399 				return (NULL);
400 			}
401 		}
402 	}
403 	IPSEC_ASSERT(sp != NULL,
404 		("null SP (priv %u policy %u", pcbsp->priv, currsp->policy));
405 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
406 		printf("DP %s (priv %u policy %u) allocate SP:%p (refcnt %u)\n",
407 			__func__, pcbsp->priv, currsp->policy, sp, sp->refcnt));
408 	return (sp);
409 }
410 
411 /*
412  * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
413  * and return a pointer to SP.
414  * OUT:	positive: a pointer to the entry for security policy leaf matched.
415  *	NULL:	no apropreate SP found, the following value is set to error.
416  *		0	: bypass
417  *		EACCES	: discard packet.
418  *		ENOENT	: ipsec_acquire() in progress, maybe.
419  *		others	: error occured.
420  */
421 struct secpolicy *
422 ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int *error)
423 {
424 	struct secpolicyindex spidx;
425 	struct secpolicy *sp;
426 
427 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
428 	IPSEC_ASSERT(error != NULL, ("null error"));
429 	IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND,
430 		("invalid direction %u", dir));
431 
432 	sp = NULL;
433 	*error = 0;
434 	if (key_havesp(dir)) {
435 		/* Make an index to look for a policy. */
436 		*error = ipsec_setspidx(m, &spidx, 0);
437 		if (*error != 0) {
438 			DPRINTF(("%s: setpidx failed, dir %u\n",
439 				__func__, dir));
440 			return (NULL);
441 		}
442 		spidx.dir = dir;
443 		sp = KEY_ALLOCSP(&spidx, dir);
444 	}
445 	if (sp == NULL)			/* No SP found, use system default. */
446 		sp = KEY_ALLOCSP_DEFAULT();
447 	IPSEC_ASSERT(sp != NULL, ("null SP"));
448 	return (sp);
449 }
450 
451 struct secpolicy *
452 ipsec4_checkpolicy(struct mbuf *m, u_int dir, int *error, struct inpcb *inp)
453 {
454 	struct secpolicy *sp;
455 
456 	*error = 0;
457 	if (inp == NULL)
458 		sp = ipsec_getpolicybyaddr(m, dir, error);
459 	else
460 		sp = ipsec_getpolicybysock(m, dir, inp, error);
461 	if (sp == NULL) {
462 		IPSEC_ASSERT(*error != 0, ("getpolicy failed w/o error"));
463 		IPSECSTAT_INC(ips_out_inval);
464 		return (NULL);
465 	}
466 	IPSEC_ASSERT(*error == 0, ("sp w/ error set to %u", *error));
467 	switch (sp->policy) {
468 	case IPSEC_POLICY_ENTRUST:
469 	default:
470 		printf("%s: invalid policy %u\n", __func__, sp->policy);
471 		/* FALLTHROUGH */
472 	case IPSEC_POLICY_DISCARD:
473 		IPSECSTAT_INC(ips_out_polvio);
474 		*error = -EINVAL;	/* Packet is discarded by caller. */
475 		break;
476 	case IPSEC_POLICY_BYPASS:
477 	case IPSEC_POLICY_NONE:
478 		KEY_FREESP(&sp);
479 		sp = NULL;		/* NB: force NULL result. */
480 		break;
481 	case IPSEC_POLICY_IPSEC:
482 		if (sp->req == NULL)	/* Acquire a SA. */
483 			*error = key_spdacquire(sp);
484 		break;
485 	}
486 	if (*error != 0) {
487 		KEY_FREESP(&sp);
488 		sp = NULL;
489 	}
490 	return (sp);
491 }
492 
493 static int
494 ipsec_setspidx_inpcb(struct mbuf *m, struct inpcb *inp)
495 {
496 	int error;
497 
498 	IPSEC_ASSERT(inp != NULL, ("null inp"));
499 	IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp"));
500 	IPSEC_ASSERT(inp->inp_sp->sp_out != NULL && inp->inp_sp->sp_in != NULL,
501 		("null sp_in || sp_out"));
502 
503 	error = ipsec_setspidx(m, &inp->inp_sp->sp_in->spidx, 1);
504 	if (error == 0) {
505 		inp->inp_sp->sp_in->spidx.dir = IPSEC_DIR_INBOUND;
506 		inp->inp_sp->sp_out->spidx = inp->inp_sp->sp_in->spidx;
507 		inp->inp_sp->sp_out->spidx.dir = IPSEC_DIR_OUTBOUND;
508 	} else {
509 		bzero(&inp->inp_sp->sp_in->spidx,
510 			sizeof (inp->inp_sp->sp_in->spidx));
511 		bzero(&inp->inp_sp->sp_out->spidx,
512 			sizeof (inp->inp_sp->sp_in->spidx));
513 	}
514 	return (error);
515 }
516 
517 /*
518  * Configure security policy index (src/dst/proto/sport/dport)
519  * by looking at the content of mbuf.
520  * The caller is responsible for error recovery (like clearing up spidx).
521  */
522 static int
523 ipsec_setspidx(struct mbuf *m, struct secpolicyindex *spidx, int needport)
524 {
525 	struct ip *ip = NULL;
526 	struct ip ipbuf;
527 	u_int v;
528 	struct mbuf *n;
529 	int len;
530 	int error;
531 
532 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
533 
534 	/*
535 	 * Validate m->m_pkthdr.len.  We see incorrect length if we
536 	 * mistakenly call this function with inconsistent mbuf chain
537 	 * (like 4.4BSD tcp/udp processing).  XXX Should we panic here?
538 	 */
539 	len = 0;
540 	for (n = m; n; n = n->m_next)
541 		len += n->m_len;
542 	if (m->m_pkthdr.len != len) {
543 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
544 			printf("%s: pkthdr len(%d) mismatch (%d), ignored.\n",
545 				__func__, len, m->m_pkthdr.len));
546 		return (EINVAL);
547 	}
548 
549 	if (m->m_pkthdr.len < sizeof(struct ip)) {
550 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
551 			printf("%s: pkthdr len(%d) too small (v4), ignored.\n",
552 			    __func__, m->m_pkthdr.len));
553 		return (EINVAL);
554 	}
555 
556 	if (m->m_len >= sizeof(*ip))
557 		ip = mtod(m, struct ip *);
558 	else {
559 		m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf);
560 		ip = &ipbuf;
561 	}
562 	v = ip->ip_v;
563 	switch (v) {
564 	case 4:
565 		error = ipsec4_setspidx_ipaddr(m, spidx);
566 		if (error)
567 			return (error);
568 		ipsec4_get_ulp(m, spidx, needport);
569 		return (0);
570 #ifdef INET6
571 	case 6:
572 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
573 			KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
574 				printf("%s: pkthdr len(%d) too small (v6), "
575 				"ignored\n", __func__, m->m_pkthdr.len));
576 			return (EINVAL);
577 		}
578 		error = ipsec6_setspidx_ipaddr(m, spidx);
579 		if (error)
580 			return (error);
581 		ipsec6_get_ulp(m, spidx, needport);
582 		return (0);
583 #endif
584 	default:
585 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
586 			printf("%s: " "unknown IP version %u, ignored.\n",
587 				__func__, v));
588 		return (EINVAL);
589 	}
590 }
591 
592 static void
593 ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
594 {
595 	u_int8_t nxt;
596 	int off;
597 
598 	/* Sanity check. */
599 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
600 	IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip),("packet too short"));
601 
602 	if (m->m_len >= sizeof (struct ip)) {
603 		struct ip *ip = mtod(m, struct ip *);
604 		if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
605 			goto done;
606 		off = ip->ip_hl << 2;
607 		nxt = ip->ip_p;
608 	} else {
609 		struct ip ih;
610 
611 		m_copydata(m, 0, sizeof (struct ip), (caddr_t) &ih);
612 		if (ih.ip_off & htons(IP_MF | IP_OFFMASK))
613 			goto done;
614 		off = ih.ip_hl << 2;
615 		nxt = ih.ip_p;
616 	}
617 
618 	while (off < m->m_pkthdr.len) {
619 		struct ip6_ext ip6e;
620 		struct tcphdr th;
621 		struct udphdr uh;
622 
623 		switch (nxt) {
624 		case IPPROTO_TCP:
625 			spidx->ul_proto = nxt;
626 			if (!needport)
627 				goto done_proto;
628 			if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
629 				goto done;
630 			m_copydata(m, off, sizeof (th), (caddr_t) &th);
631 			spidx->src.sin.sin_port = th.th_sport;
632 			spidx->dst.sin.sin_port = th.th_dport;
633 			return;
634 		case IPPROTO_UDP:
635 			spidx->ul_proto = nxt;
636 			if (!needport)
637 				goto done_proto;
638 			if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
639 				goto done;
640 			m_copydata(m, off, sizeof (uh), (caddr_t) &uh);
641 			spidx->src.sin.sin_port = uh.uh_sport;
642 			spidx->dst.sin.sin_port = uh.uh_dport;
643 			return;
644 		case IPPROTO_AH:
645 			if (off + sizeof(ip6e) > m->m_pkthdr.len)
646 				goto done;
647 			/* XXX Sigh, this works but is totally bogus. */
648 			m_copydata(m, off, sizeof(ip6e), (caddr_t) &ip6e);
649 			off += (ip6e.ip6e_len + 2) << 2;
650 			nxt = ip6e.ip6e_nxt;
651 			break;
652 		case IPPROTO_ICMP:
653 		default:
654 			/* XXX Intermediate headers??? */
655 			spidx->ul_proto = nxt;
656 			goto done_proto;
657 		}
658 	}
659 done:
660 	spidx->ul_proto = IPSEC_ULPROTO_ANY;
661 done_proto:
662 	spidx->src.sin.sin_port = IPSEC_PORT_ANY;
663 	spidx->dst.sin.sin_port = IPSEC_PORT_ANY;
664 }
665 
666 /* Assumes that m is sane. */
667 static int
668 ipsec4_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
669 {
670 	static const struct sockaddr_in template = {
671 		sizeof (struct sockaddr_in),
672 		AF_INET,
673 		0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }
674 	};
675 
676 	spidx->src.sin = template;
677 	spidx->dst.sin = template;
678 
679 	if (m->m_len < sizeof (struct ip)) {
680 		m_copydata(m, offsetof(struct ip, ip_src),
681 			   sizeof (struct  in_addr),
682 			   (caddr_t) &spidx->src.sin.sin_addr);
683 		m_copydata(m, offsetof(struct ip, ip_dst),
684 			   sizeof (struct  in_addr),
685 			   (caddr_t) &spidx->dst.sin.sin_addr);
686 	} else {
687 		struct ip *ip = mtod(m, struct ip *);
688 		spidx->src.sin.sin_addr = ip->ip_src;
689 		spidx->dst.sin.sin_addr = ip->ip_dst;
690 	}
691 
692 	spidx->prefs = sizeof(struct in_addr) << 3;
693 	spidx->prefd = sizeof(struct in_addr) << 3;
694 
695 	return (0);
696 }
697 
698 #ifdef INET6
699 static void
700 ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
701 {
702 	int off, nxt;
703 	struct tcphdr th;
704 	struct udphdr uh;
705 	struct icmp6_hdr ih;
706 
707 	/* Sanity check. */
708 	if (m == NULL)
709 		panic("%s: NULL pointer was passed.\n", __func__);
710 
711 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
712 		printf("%s:\n", __func__); kdebug_mbuf(m));
713 
714 	/* Set default. */
715 	spidx->ul_proto = IPSEC_ULPROTO_ANY;
716 	((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY;
717 	((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY;
718 
719 	nxt = -1;
720 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
721 	if (off < 0 || m->m_pkthdr.len < off)
722 		return;
723 
724 	switch (nxt) {
725 	case IPPROTO_TCP:
726 		spidx->ul_proto = nxt;
727 		if (!needport)
728 			break;
729 		if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
730 			break;
731 		m_copydata(m, off, sizeof(th), (caddr_t)&th);
732 		((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport;
733 		((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport;
734 		break;
735 	case IPPROTO_UDP:
736 		spidx->ul_proto = nxt;
737 		if (!needport)
738 			break;
739 		if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
740 			break;
741 		m_copydata(m, off, sizeof(uh), (caddr_t)&uh);
742 		((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport;
743 		((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport;
744 		break;
745 	case IPPROTO_ICMPV6:
746 		spidx->ul_proto = nxt;
747 		if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len)
748 			break;
749 		m_copydata(m, off, sizeof(ih), (caddr_t)&ih);
750 		((struct sockaddr_in6 *)&spidx->src)->sin6_port =
751 		    htons((uint16_t)ih.icmp6_type);
752 		((struct sockaddr_in6 *)&spidx->dst)->sin6_port =
753 		    htons((uint16_t)ih.icmp6_code);
754 		break;
755 	default:
756 		/* XXX Intermediate headers??? */
757 		spidx->ul_proto = nxt;
758 		break;
759 	}
760 }
761 
762 /* Assumes that m is sane. */
763 static int
764 ipsec6_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
765 {
766 	struct ip6_hdr *ip6 = NULL;
767 	struct ip6_hdr ip6buf;
768 	struct sockaddr_in6 *sin6;
769 
770 	if (m->m_len >= sizeof(*ip6))
771 		ip6 = mtod(m, struct ip6_hdr *);
772 	else {
773 		m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf);
774 		ip6 = &ip6buf;
775 	}
776 
777 	sin6 = (struct sockaddr_in6 *)&spidx->src;
778 	bzero(sin6, sizeof(*sin6));
779 	sin6->sin6_family = AF_INET6;
780 	sin6->sin6_len = sizeof(struct sockaddr_in6);
781 	bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src));
782 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
783 		sin6->sin6_addr.s6_addr16[1] = 0;
784 		sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
785 	}
786 	spidx->prefs = sizeof(struct in6_addr) << 3;
787 
788 	sin6 = (struct sockaddr_in6 *)&spidx->dst;
789 	bzero(sin6, sizeof(*sin6));
790 	sin6->sin6_family = AF_INET6;
791 	sin6->sin6_len = sizeof(struct sockaddr_in6);
792 	bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst));
793 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
794 		sin6->sin6_addr.s6_addr16[1] = 0;
795 		sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
796 	}
797 	spidx->prefd = sizeof(struct in6_addr) << 3;
798 
799 	return (0);
800 }
801 #endif
802 
803 static void
804 ipsec_delpcbpolicy(struct inpcbpolicy *p)
805 {
806 
807 	free(p, M_IPSEC_INPCB);
808 }
809 
810 /* Initialize policy in PCB. */
811 int
812 ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp)
813 {
814 	struct inpcbpolicy *new;
815 
816 	/* Sanity check. */
817 	if (so == NULL || pcb_sp == NULL)
818 		panic("%s: NULL pointer was passed.\n", __func__);
819 
820 	new = (struct inpcbpolicy *) malloc(sizeof(struct inpcbpolicy),
821 					    M_IPSEC_INPCB, M_NOWAIT|M_ZERO);
822 	if (new == NULL) {
823 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
824 		return (ENOBUFS);
825 	}
826 
827 	new->priv = IPSEC_IS_PRIVILEGED_SO(so);
828 
829 	if ((new->sp_in = KEY_NEWSP()) == NULL) {
830 		ipsec_delpcbpolicy(new);
831 		return (ENOBUFS);
832 	}
833 	new->sp_in->policy = IPSEC_POLICY_ENTRUST;
834 	if ((new->sp_out = KEY_NEWSP()) == NULL) {
835 		KEY_FREESP(&new->sp_in);
836 		ipsec_delpcbpolicy(new);
837 		return (ENOBUFS);
838 	}
839 	new->sp_out->policy = IPSEC_POLICY_ENTRUST;
840 	*pcb_sp = new;
841 
842 	return (0);
843 }
844 
845 /* Copy old IPsec policy into new. */
846 int
847 ipsec_copy_policy(struct inpcbpolicy *old, struct inpcbpolicy *new)
848 {
849 	struct secpolicy *sp;
850 
851 	sp = ipsec_deepcopy_policy(old->sp_in);
852 	if (sp) {
853 		KEY_FREESP(&new->sp_in);
854 		new->sp_in = sp;
855 	} else
856 		return (ENOBUFS);
857 
858 	sp = ipsec_deepcopy_policy(old->sp_out);
859 	if (sp) {
860 		KEY_FREESP(&new->sp_out);
861 		new->sp_out = sp;
862 	} else
863 		return (ENOBUFS);
864 
865 	new->priv = old->priv;
866 
867 	return (0);
868 }
869 
870 struct ipsecrequest *
871 ipsec_newisr(void)
872 {
873 	struct ipsecrequest *p;
874 
875 	p = malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, M_NOWAIT|M_ZERO);
876 	if (p != NULL)
877 		IPSECREQUEST_LOCK_INIT(p);
878 	return (p);
879 }
880 
881 void
882 ipsec_delisr(struct ipsecrequest *p)
883 {
884 
885 	IPSECREQUEST_LOCK_DESTROY(p);
886 	free(p, M_IPSEC_SR);
887 }
888 
889 /* Deep-copy a policy in PCB. */
890 static struct secpolicy *
891 ipsec_deepcopy_policy(struct secpolicy *src)
892 {
893 	struct ipsecrequest *newchain = NULL;
894 	struct ipsecrequest *p;
895 	struct ipsecrequest **q;
896 	struct ipsecrequest *r;
897 	struct secpolicy *dst;
898 
899 	if (src == NULL)
900 		return (NULL);
901 	dst = KEY_NEWSP();
902 	if (dst == NULL)
903 		return (NULL);
904 
905 	/*
906 	 * Deep-copy IPsec request chain.  This is required since struct
907 	 * ipsecrequest is not reference counted.
908 	 */
909 	q = &newchain;
910 	for (p = src->req; p; p = p->next) {
911 		*q = ipsec_newisr();
912 		if (*q == NULL)
913 			goto fail;
914 		(*q)->saidx.proto = p->saidx.proto;
915 		(*q)->saidx.mode = p->saidx.mode;
916 		(*q)->level = p->level;
917 		(*q)->saidx.reqid = p->saidx.reqid;
918 
919 		bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src));
920 		bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst));
921 
922 		(*q)->sp = dst;
923 
924 		q = &((*q)->next);
925 	}
926 
927 	dst->req = newchain;
928 	dst->policy = src->policy;
929 	/* Do not touch the refcnt fields. */
930 
931 	return (dst);
932 
933 fail:
934 	for (p = newchain; p; p = r) {
935 		r = p->next;
936 		ipsec_delisr(p);
937 		p = NULL;
938 	}
939 	return (NULL);
940 }
941 
942 /* Set policy and IPsec request if present. */
943 static int
944 ipsec_set_policy_internal(struct secpolicy **pcb_sp, int optname,
945     caddr_t request, size_t len, struct ucred *cred)
946 {
947 	struct sadb_x_policy *xpl;
948 	struct secpolicy *newsp = NULL;
949 	int error;
950 
951 	/* Sanity check. */
952 	if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL)
953 		return (EINVAL);
954 	if (len < sizeof(*xpl))
955 		return (EINVAL);
956 	xpl = (struct sadb_x_policy *)request;
957 
958 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
959 		printf("%s: passed policy\n", __func__);
960 		kdebug_sadb_x_policy((struct sadb_ext *)xpl));
961 
962 	/* Check policy type. */
963 	/* ipsec_set_policy_internal() accepts IPSEC, ENTRUST and BYPASS. */
964 	if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD
965 	 || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE)
966 		return (EINVAL);
967 
968 	/* Check privileged socket. */
969 	if (cred != NULL && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) {
970 		error = priv_check_cred(cred, PRIV_NETINET_IPSEC, 0);
971 		if (error)
972 			return (EACCES);
973 	}
974 
975 	/* Allocating new SP entry. */
976 	if ((newsp = key_msg2sp(xpl, len, &error)) == NULL)
977 		return (error);
978 
979 	/* Clear old SP and set new SP. */
980 	KEY_FREESP(pcb_sp);
981 	*pcb_sp = newsp;
982 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
983 		printf("%s: new policy\n", __func__);
984 		kdebug_secpolicy(newsp));
985 
986 	return (0);
987 }
988 
989 int
990 ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request,
991     size_t len, struct ucred *cred)
992 {
993 	struct sadb_x_policy *xpl;
994 	struct secpolicy **pcb_sp;
995 
996 	/* Sanity check. */
997 	if (inp == NULL || request == NULL)
998 		return (EINVAL);
999 	if (len < sizeof(*xpl))
1000 		return (EINVAL);
1001 	xpl = (struct sadb_x_policy *)request;
1002 
1003 	/* Select direction. */
1004 	switch (xpl->sadb_x_policy_dir) {
1005 	case IPSEC_DIR_INBOUND:
1006 		pcb_sp = &inp->inp_sp->sp_in;
1007 		break;
1008 	case IPSEC_DIR_OUTBOUND:
1009 		pcb_sp = &inp->inp_sp->sp_out;
1010 		break;
1011 	default:
1012 		ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__,
1013 			xpl->sadb_x_policy_dir));
1014 		return (EINVAL);
1015 	}
1016 
1017 	return (ipsec_set_policy_internal(pcb_sp, optname, request, len, cred));
1018 }
1019 
1020 int
1021 ipsec_get_policy(struct inpcb *inp, caddr_t request, size_t len,
1022     struct mbuf **mp)
1023 {
1024 	struct sadb_x_policy *xpl;
1025 	struct secpolicy *pcb_sp;
1026 
1027 	/* Sanity check. */
1028 	if (inp == NULL || request == NULL || mp == NULL)
1029 		return (EINVAL);
1030 	IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp"));
1031 	if (len < sizeof(*xpl))
1032 		return (EINVAL);
1033 	xpl = (struct sadb_x_policy *)request;
1034 
1035 	/* Select direction. */
1036 	switch (xpl->sadb_x_policy_dir) {
1037 	case IPSEC_DIR_INBOUND:
1038 		pcb_sp = inp->inp_sp->sp_in;
1039 		break;
1040 	case IPSEC_DIR_OUTBOUND:
1041 		pcb_sp = inp->inp_sp->sp_out;
1042 		break;
1043 	default:
1044 		ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__,
1045 			xpl->sadb_x_policy_dir));
1046 		return (EINVAL);
1047 	}
1048 
1049 	/* Sanity check. Should be an IPSEC_ASSERT. */
1050 	if (pcb_sp == NULL)
1051 		return (EINVAL);
1052 
1053 	*mp = key_sp2msg(pcb_sp);
1054 	if (!*mp) {
1055 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
1056 		return (ENOBUFS);
1057 	}
1058 
1059 	(*mp)->m_type = MT_DATA;
1060 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1061 		printf("%s:\n", __func__); kdebug_mbuf(*mp));
1062 
1063 	return (0);
1064 }
1065 
1066 /* Delete policy in PCB. */
1067 int
1068 ipsec_delete_pcbpolicy(struct inpcb *inp)
1069 {
1070 	IPSEC_ASSERT(inp != NULL, ("null inp"));
1071 
1072 	if (inp->inp_sp == NULL)
1073 		return (0);
1074 
1075 	if (inp->inp_sp->sp_in != NULL)
1076 		KEY_FREESP(&inp->inp_sp->sp_in);
1077 
1078 	if (inp->inp_sp->sp_out != NULL)
1079 		KEY_FREESP(&inp->inp_sp->sp_out);
1080 
1081 	ipsec_delpcbpolicy(inp->inp_sp);
1082 	inp->inp_sp = NULL;
1083 
1084 	return (0);
1085 }
1086 
1087 /*
1088  * Return current level.
1089  * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned.
1090  */
1091 u_int
1092 ipsec_get_reqlevel(struct ipsecrequest *isr)
1093 {
1094 	u_int level = 0;
1095 	u_int esp_trans_deflev, esp_net_deflev;
1096 	u_int ah_trans_deflev, ah_net_deflev;
1097 
1098 	IPSEC_ASSERT(isr != NULL && isr->sp != NULL, ("null argument"));
1099 	IPSEC_ASSERT(isr->sp->spidx.src.sa.sa_family == isr->sp->spidx.dst.sa.sa_family,
1100 		("af family mismatch, src %u, dst %u",
1101 		 isr->sp->spidx.src.sa.sa_family,
1102 		 isr->sp->spidx.dst.sa.sa_family));
1103 
1104 /* XXX Note that we have ipseclog() expanded here - code sync issue. */
1105 #define IPSEC_CHECK_DEFAULT(lev) \
1106 	(((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE	      \
1107 			&& (lev) != IPSEC_LEVEL_UNIQUE)			      \
1108 		? (V_ipsec_debug						      \
1109 			? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\
1110 				(lev), IPSEC_LEVEL_REQUIRE)		      \
1111 			: 0),						      \
1112 			(lev) = IPSEC_LEVEL_REQUIRE,			      \
1113 			(lev)						      \
1114 		: (lev))
1115 
1116 	/* Set default level. */
1117 	switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) {
1118 #ifdef INET
1119 	case AF_INET:
1120 		esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_trans_deflev);
1121 		esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_net_deflev);
1122 		ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_trans_deflev);
1123 		ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_net_deflev);
1124 		break;
1125 #endif
1126 #ifdef INET6
1127 	case AF_INET6:
1128 		esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_trans_deflev);
1129 		esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_net_deflev);
1130 		ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_trans_deflev);
1131 		ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_net_deflev);
1132 		break;
1133 #endif /* INET6 */
1134 	default:
1135 		panic("%s: unknown af %u",
1136 			__func__, isr->sp->spidx.src.sa.sa_family);
1137 	}
1138 
1139 #undef IPSEC_CHECK_DEFAULT
1140 
1141 	/* Set level. */
1142 	switch (isr->level) {
1143 	case IPSEC_LEVEL_DEFAULT:
1144 		switch (isr->saidx.proto) {
1145 		case IPPROTO_ESP:
1146 			if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
1147 				level = esp_net_deflev;
1148 			else
1149 				level = esp_trans_deflev;
1150 			break;
1151 		case IPPROTO_AH:
1152 			if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
1153 				level = ah_net_deflev;
1154 			else
1155 				level = ah_trans_deflev;
1156 			break;
1157 		case IPPROTO_IPCOMP:
1158 			/*
1159 			 * We don't really care, as IPcomp document says that
1160 			 * we shouldn't compress small packets.
1161 			 */
1162 			level = IPSEC_LEVEL_USE;
1163 			break;
1164 		default:
1165 			panic("%s: Illegal protocol defined %u\n", __func__,
1166 				isr->saidx.proto);
1167 		}
1168 		break;
1169 
1170 	case IPSEC_LEVEL_USE:
1171 	case IPSEC_LEVEL_REQUIRE:
1172 		level = isr->level;
1173 		break;
1174 	case IPSEC_LEVEL_UNIQUE:
1175 		level = IPSEC_LEVEL_REQUIRE;
1176 		break;
1177 
1178 	default:
1179 		panic("%s: Illegal IPsec level %u\n", __func__, isr->level);
1180 	}
1181 
1182 	return (level);
1183 }
1184 
1185 /*
1186  * Check security policy requirements against the actual
1187  * packet contents.  Return one if the packet should be
1188  * reject as "invalid"; otherwiser return zero to have the
1189  * packet treated as "valid".
1190  *
1191  * OUT:
1192  *	0: valid
1193  *	1: invalid
1194  */
1195 static int
1196 ipsec_in_reject(struct secpolicy *sp, struct mbuf *m)
1197 {
1198 	struct ipsecrequest *isr;
1199 	int need_auth;
1200 
1201 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1202 		printf("%s: using SP\n", __func__); kdebug_secpolicy(sp));
1203 
1204 	/* Check policy. */
1205 	switch (sp->policy) {
1206 	case IPSEC_POLICY_DISCARD:
1207 		return (1);
1208 	case IPSEC_POLICY_BYPASS:
1209 	case IPSEC_POLICY_NONE:
1210 		return (0);
1211 	}
1212 
1213 	IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC,
1214 		("invalid policy %u", sp->policy));
1215 
1216 	/* XXX Should compare policy against IPsec header history. */
1217 
1218 	need_auth = 0;
1219 	for (isr = sp->req; isr != NULL; isr = isr->next) {
1220 		if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE)
1221 			continue;
1222 		switch (isr->saidx.proto) {
1223 		case IPPROTO_ESP:
1224 			if ((m->m_flags & M_DECRYPTED) == 0) {
1225 				KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1226 				    printf("%s: ESP m_flags:%x\n", __func__,
1227 					    m->m_flags));
1228 				return (1);
1229 			}
1230 
1231 			if (!need_auth &&
1232 			    isr->sav != NULL &&
1233 			    isr->sav->tdb_authalgxform != NULL &&
1234 			    (m->m_flags & M_AUTHIPDGM) == 0) {
1235 				KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1236 				    printf("%s: ESP/AH m_flags:%x\n", __func__,
1237 					    m->m_flags));
1238 				return (1);
1239 			}
1240 			break;
1241 		case IPPROTO_AH:
1242 			need_auth = 1;
1243 			if ((m->m_flags & M_AUTHIPHDR) == 0) {
1244 				KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1245 				    printf("%s: AH m_flags:%x\n", __func__,
1246 					    m->m_flags));
1247 				return (1);
1248 			}
1249 			break;
1250 		case IPPROTO_IPCOMP:
1251 			/*
1252 			 * We don't really care, as IPcomp document
1253 			 * says that we shouldn't compress small
1254 			 * packets.  IPComp policy should always be
1255 			 * treated as being in "use" level.
1256 			 */
1257 			break;
1258 		}
1259 	}
1260 	return (0);		/* Valid. */
1261 }
1262 
1263 /*
1264  * Non zero return value means security policy DISCARD or policy violation.
1265  */
1266 static int
1267 ipsec46_in_reject(struct mbuf *m, struct inpcb *inp)
1268 {
1269 	struct secpolicy *sp;
1270 	int error;
1271 	int result;
1272 
1273 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
1274 
1275 	/* Get SP for this packet. */
1276 	if (inp == NULL)
1277 		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, &error);
1278 	else
1279 		sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND, inp, &error);
1280 
1281 	if (sp != NULL) {
1282 		result = ipsec_in_reject(sp, m);
1283 		KEY_FREESP(&sp);
1284 	} else {
1285 		result = 1;	/* treat errors as policy violation */
1286 	}
1287 	return (result);
1288 }
1289 
1290 /*
1291  * Check AH/ESP integrity.
1292  * This function is called from tcp_input(), udp_input(),
1293  * and {ah,esp}4_input for tunnel mode.
1294  */
1295 int
1296 ipsec4_in_reject(struct mbuf *m, struct inpcb *inp)
1297 {
1298 	int result;
1299 
1300 	result = ipsec46_in_reject(m, inp);
1301 	if (result)
1302 		IPSECSTAT_INC(ips_in_polvio);
1303 
1304 	return (result);
1305 }
1306 
1307 #ifdef INET6
1308 /*
1309  * Check AH/ESP integrity.
1310  * This function is called from tcp6_input(), udp6_input(),
1311  * and {ah,esp}6_input for tunnel mode.
1312  */
1313 int
1314 ipsec6_in_reject(struct mbuf *m, struct inpcb *inp)
1315 {
1316 	int result;
1317 
1318 	result = ipsec46_in_reject(m, inp);
1319 	if (result)
1320 		IPSEC6STAT_INC(ips_in_polvio);
1321 
1322 	return (result);
1323 }
1324 #endif
1325 
1326 /*
1327  * Compute the byte size to be occupied by IPsec header.
1328  * In case it is tunnelled, it includes the size of outer IP header.
1329  * NOTE: SP passed is freed in this function.
1330  */
1331 static size_t
1332 ipsec_hdrsiz_internal(struct secpolicy *sp)
1333 {
1334 	struct ipsecrequest *isr;
1335 	size_t size;
1336 
1337 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1338 		printf("%s: using SP\n", __func__); kdebug_secpolicy(sp));
1339 
1340 	switch (sp->policy) {
1341 	case IPSEC_POLICY_DISCARD:
1342 	case IPSEC_POLICY_BYPASS:
1343 	case IPSEC_POLICY_NONE:
1344 		return (0);
1345 	}
1346 
1347 	IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC,
1348 		("invalid policy %u", sp->policy));
1349 
1350 	size = 0;
1351 	for (isr = sp->req; isr != NULL; isr = isr->next) {
1352 		size_t clen = 0;
1353 
1354 		switch (isr->saidx.proto) {
1355 		case IPPROTO_ESP:
1356 			clen = esp_hdrsiz(isr->sav);
1357 			break;
1358 		case IPPROTO_AH:
1359 			clen = ah_hdrsiz(isr->sav);
1360 			break;
1361 		case IPPROTO_IPCOMP:
1362 			clen = sizeof(struct ipcomp);
1363 			break;
1364 		}
1365 
1366 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
1367 			switch (isr->saidx.dst.sa.sa_family) {
1368 			case AF_INET:
1369 				clen += sizeof(struct ip);
1370 				break;
1371 #ifdef INET6
1372 			case AF_INET6:
1373 				clen += sizeof(struct ip6_hdr);
1374 				break;
1375 #endif
1376 			default:
1377 				ipseclog((LOG_ERR, "%s: unknown AF %d in "
1378 				    "IPsec tunnel SA\n", __func__,
1379 				    ((struct sockaddr *)&isr->saidx.dst)->sa_family));
1380 				break;
1381 			}
1382 		}
1383 		size += clen;
1384 	}
1385 
1386 	return (size);
1387 }
1388 
1389 /*
1390  * This function is called from ipsec_hdrsiz_tcp(), ip_ipsec_mtu(),
1391  * disabled ip6_ipsec_mtu() and ip6_forward().
1392  */
1393 size_t
1394 ipsec_hdrsiz(struct mbuf *m, u_int dir, struct inpcb *inp)
1395 {
1396 	struct secpolicy *sp;
1397 	int error;
1398 	size_t size;
1399 
1400 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
1401 
1402 	/* Get SP for this packet. */
1403 	if (inp == NULL)
1404 		sp = ipsec_getpolicybyaddr(m, dir, &error);
1405 	else
1406 		sp = ipsec_getpolicybysock(m, dir, inp, &error);
1407 
1408 	if (sp != NULL) {
1409 		size = ipsec_hdrsiz_internal(sp);
1410 		KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1411 			printf("%s: size:%lu.\n", __func__,
1412 				(unsigned long)size));
1413 
1414 		KEY_FREESP(&sp);
1415 	} else {
1416 		size = 0;	/* XXX Should be panic?
1417 				 * -> No, we are called w/o knowing if
1418 				 *    IPsec processing is needed. */
1419 	}
1420 	return (size);
1421 }
1422 
1423 /*
1424  * Check the variable replay window.
1425  * ipsec_chkreplay() performs replay check before ICV verification.
1426  * ipsec_updatereplay() updates replay bitmap.  This must be called after
1427  * ICV verification (it also performs replay check, which is usually done
1428  * beforehand).
1429  * 0 (zero) is returned if packet disallowed, 1 if packet permitted.
1430  *
1431  * Based on RFC 2401.
1432  */
1433 int
1434 ipsec_chkreplay(u_int32_t seq, struct secasvar *sav)
1435 {
1436 	const struct secreplay *replay;
1437 	u_int32_t diff;
1438 	int fr;
1439 	u_int32_t wsizeb;	/* Constant: bits of window size. */
1440 	int frlast;		/* Constant: last frame. */
1441 
1442 	IPSEC_ASSERT(sav != NULL, ("Null SA"));
1443 	IPSEC_ASSERT(sav->replay != NULL, ("Null replay state"));
1444 
1445 	replay = sav->replay;
1446 
1447 	if (replay->wsize == 0)
1448 		return (1);	/* No need to check replay. */
1449 
1450 	/* Constant. */
1451 	frlast = replay->wsize - 1;
1452 	wsizeb = replay->wsize << 3;
1453 
1454 	/* Sequence number of 0 is invalid. */
1455 	if (seq == 0)
1456 		return (0);
1457 
1458 	/* First time is always okay. */
1459 	if (replay->count == 0)
1460 		return (1);
1461 
1462 	if (seq > replay->lastseq) {
1463 		/* Larger sequences are okay. */
1464 		return (1);
1465 	} else {
1466 		/* seq is equal or less than lastseq. */
1467 		diff = replay->lastseq - seq;
1468 
1469 		/* Over range to check, i.e. too old or wrapped. */
1470 		if (diff >= wsizeb)
1471 			return (0);
1472 
1473 		fr = frlast - diff / 8;
1474 
1475 		/* This packet already seen? */
1476 		if ((replay->bitmap)[fr] & (1 << (diff % 8)))
1477 			return (0);
1478 
1479 		/* Out of order but good. */
1480 		return (1);
1481 	}
1482 }
1483 
1484 /*
1485  * Check replay counter whether to update or not.
1486  * OUT:	0:	OK
1487  *	1:	NG
1488  */
1489 int
1490 ipsec_updatereplay(u_int32_t seq, struct secasvar *sav)
1491 {
1492 	char buf[128];
1493 	struct secreplay *replay;
1494 	u_int32_t diff;
1495 	int fr;
1496 	u_int32_t wsizeb;	/* Constant: bits of window size. */
1497 	int frlast;		/* Constant: last frame. */
1498 
1499 	IPSEC_ASSERT(sav != NULL, ("Null SA"));
1500 	IPSEC_ASSERT(sav->replay != NULL, ("Null replay state"));
1501 
1502 	replay = sav->replay;
1503 
1504 	if (replay->wsize == 0)
1505 		goto ok;	/* No need to check replay. */
1506 
1507 	/* Constant. */
1508 	frlast = replay->wsize - 1;
1509 	wsizeb = replay->wsize << 3;
1510 
1511 	/* Sequence number of 0 is invalid. */
1512 	if (seq == 0)
1513 		return (1);
1514 
1515 	/* First time. */
1516 	if (replay->count == 0) {
1517 		replay->lastseq = seq;
1518 		bzero(replay->bitmap, replay->wsize);
1519 		(replay->bitmap)[frlast] = 1;
1520 		goto ok;
1521 	}
1522 
1523 	if (seq > replay->lastseq) {
1524 		/* seq is larger than lastseq. */
1525 		diff = seq - replay->lastseq;
1526 
1527 		/* New larger sequence number. */
1528 		if (diff < wsizeb) {
1529 			/* In window. */
1530 			/* Set bit for this packet. */
1531 			vshiftl(replay->bitmap, diff, replay->wsize);
1532 			(replay->bitmap)[frlast] |= 1;
1533 		} else {
1534 			/* This packet has a "way larger". */
1535 			bzero(replay->bitmap, replay->wsize);
1536 			(replay->bitmap)[frlast] = 1;
1537 		}
1538 		replay->lastseq = seq;
1539 
1540 		/* Larger is good. */
1541 	} else {
1542 		/* seq is equal or less than lastseq. */
1543 		diff = replay->lastseq - seq;
1544 
1545 		/* Over range to check, i.e. too old or wrapped. */
1546 		if (diff >= wsizeb)
1547 			return (1);
1548 
1549 		fr = frlast - diff / 8;
1550 
1551 		/* This packet already seen? */
1552 		if ((replay->bitmap)[fr] & (1 << (diff % 8)))
1553 			return (1);
1554 
1555 		/* Mark as seen. */
1556 		(replay->bitmap)[fr] |= (1 << (diff % 8));
1557 
1558 		/* Out of order but good. */
1559 	}
1560 
1561 ok:
1562 	if (replay->count == ~0) {
1563 
1564 		/* Set overflow flag. */
1565 		replay->overflow++;
1566 
1567 		/* Don't increment, no more packets accepted. */
1568 		if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0)
1569 			return (1);
1570 
1571 		ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n",
1572 		    __func__, replay->overflow,
1573 		    ipsec_logsastr(sav, buf, sizeof(buf))));
1574 	}
1575 
1576 	replay->count++;
1577 
1578 	return (0);
1579 }
1580 
1581 /*
1582  * Shift variable length buffer to left.
1583  * IN:	bitmap: pointer to the buffer
1584  * 	nbit:	the number of to shift.
1585  *	wsize:	buffer size (bytes).
1586  */
1587 static void
1588 vshiftl(unsigned char *bitmap, int nbit, int wsize)
1589 {
1590 	int s, j, i;
1591 	unsigned char over;
1592 
1593 	for (j = 0; j < nbit; j += 8) {
1594 		s = (nbit - j < 8) ? (nbit - j): 8;
1595 		bitmap[0] <<= s;
1596 		for (i = 1; i < wsize; i++) {
1597 			over = (bitmap[i] >> (8 - s));
1598 			bitmap[i] <<= s;
1599 			bitmap[i-1] |= over;
1600 		}
1601 	}
1602 }
1603 
1604 /* Return a printable string for the address. */
1605 char*
1606 ipsec_address(union sockaddr_union* sa, char *buf, socklen_t size)
1607 {
1608 
1609 	switch (sa->sa.sa_family) {
1610 #ifdef INET
1611 	case AF_INET:
1612 		return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size));
1613 #endif /* INET */
1614 #ifdef INET6
1615 	case AF_INET6:
1616 		return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, buf, size));
1617 #endif /* INET6 */
1618 	default:
1619 		return ("(unknown address family)");
1620 	}
1621 }
1622 
1623 char *
1624 ipsec_logsastr(struct secasvar *sav, char *buf, size_t size)
1625 {
1626 	char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
1627 
1628 	IPSEC_ASSERT(sav->sah->saidx.src.sa.sa_family ==
1629 	    sav->sah->saidx.dst.sa.sa_family, ("address family mismatch"));
1630 
1631 	snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)",
1632 	    (u_long)ntohl(sav->spi),
1633 	    ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)),
1634 	    ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf)));
1635 	return (buf);
1636 }
1637 
1638 void
1639 ipsec_dumpmbuf(struct mbuf *m)
1640 {
1641 	int totlen;
1642 	int i;
1643 	u_char *p;
1644 
1645 	totlen = 0;
1646 	printf("---\n");
1647 	while (m) {
1648 		p = mtod(m, u_char *);
1649 		for (i = 0; i < m->m_len; i++) {
1650 			printf("%02x ", p[i]);
1651 			totlen++;
1652 			if (totlen % 16 == 0)
1653 				printf("\n");
1654 		}
1655 		m = m->m_next;
1656 	}
1657 	if (totlen % 16 != 0)
1658 		printf("\n");
1659 	printf("---\n");
1660 }
1661 
1662 static void
1663 def_policy_init(const void *unused __unused)
1664 {
1665 
1666 	bzero(&V_def_policy, sizeof(struct secpolicy));
1667 	V_def_policy.policy = IPSEC_POLICY_NONE;
1668 	V_def_policy.refcnt = 1;
1669 }
1670 VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
1671     def_policy_init, NULL);
1672 
1673 
1674 /* XXX This stuff doesn't belong here... */
1675 
1676 static	struct xformsw* xforms = NULL;
1677 
1678 /*
1679  * Register a transform; typically at system startup.
1680  */
1681 void
1682 xform_register(struct xformsw* xsp)
1683 {
1684 
1685 	xsp->xf_next = xforms;
1686 	xforms = xsp;
1687 }
1688 
1689 /*
1690  * Initialize transform support in an sav.
1691  */
1692 int
1693 xform_init(struct secasvar *sav, int xftype)
1694 {
1695 	struct xformsw *xsp;
1696 
1697 	if (sav->tdb_xform != NULL)	/* Previously initialized. */
1698 		return (0);
1699 	for (xsp = xforms; xsp; xsp = xsp->xf_next)
1700 		if (xsp->xf_type == xftype)
1701 			return ((*xsp->xf_init)(sav, xsp));
1702 	return (EINVAL);
1703 }
1704