xref: /titanic_51/usr/src/uts/common/inet/ip/spd.c (revision 83fcdc8cfa9b16b358b13c5dd920d71bbaf4a8b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * IPsec Security Policy Database.
31  *
32  * This module maintains the SPD and provides routines used by ip and ip6
33  * to apply IPsec policy to inbound and outbound datagrams.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stream.h>
38 #include <sys/stropts.h>
39 #include <sys/sysmacros.h>
40 #include <sys/strsubr.h>
41 #include <sys/strlog.h>
42 #include <sys/cmn_err.h>
43 #include <sys/zone.h>
44 
45 #include <sys/systm.h>
46 #include <sys/param.h>
47 #include <sys/kmem.h>
48 
49 #include <sys/crypto/api.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 
54 #include <netinet/ip6.h>
55 #include <netinet/icmp6.h>
56 #include <netinet/udp.h>
57 
58 #include <inet/ip.h>
59 #include <inet/ip6.h>
60 
61 #include <net/pfkeyv2.h>
62 #include <net/pfpolicy.h>
63 #include <inet/ipsec_info.h>
64 #include <inet/sadb.h>
65 #include <inet/ipsec_impl.h>
66 #include <inet/ipsecah.h>
67 #include <inet/ipsecesp.h>
68 #include <inet/ipdrop.h>
69 #include <inet/ipclassifier.h>
70 
71 static void ipsec_update_present_flags();
72 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *);
73 static void ipsec_out_free(void *);
74 static void ipsec_in_free(void *);
75 static boolean_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *,
76     ipha_t *, ip6_t *);
77 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *,
78     ipsec_selector_t *);
79 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *,
80     ipsec_selector_t *);
81 static mblk_t *ipsec_check_ipsecin_policy(queue_t *, mblk_t *,
82     ipsec_policy_t *, ipha_t *, ip6_t *);
83 static void ipsec_in_release_refs(ipsec_in_t *);
84 static void ipsec_out_release_refs(ipsec_out_t *);
85 static void ipsec_action_reclaim(void *);
86 static void ipsid_init(void);
87 static void ipsid_fini(void);
88 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *,
89     struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **,
90     kstat_named_t **);
91 static int32_t ipsec_act_ovhd(const ipsec_act_t *act);
92 static void ipsec_unregister_prov_update(void);
93 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *);
94 static uint32_t selector_hash(ipsec_selector_t *);
95 
96 /*
97  * Policy rule index generator.  We assume this won't wrap in the
98  * lifetime of a system.  If we make 2^20 policy changes per second,
99  * this will last 2^44 seconds, or roughly 500,000 years, so we don't
100  * have to worry about reusing policy index values.
101  *
102  * Protected by ipsec_conf_lock.
103  */
104 uint64_t	ipsec_next_policy_index = 1;
105 
106 /*
107  * Active & Inactive system policy roots
108  */
109 static ipsec_policy_head_t system_policy;
110 static ipsec_policy_head_t inactive_policy;
111 
112 /* Packet dropper for generic SPD drops. */
113 static ipdropper_t spd_dropper;
114 
115 /*
116  * For now, use a trivially sized hash table for actions.
117  * In the future we can add the structure canonicalization necessary
118  * to get the hash function to behave correctly..
119  */
120 #define	IPSEC_ACTION_HASH_SIZE 1
121 
122 /*
123  * Selector hash table is statically sized at module load time.
124  * we default to 251 buckets, which is the largest prime number under 255
125  */
126 
127 #define	IPSEC_SPDHASH_DEFAULT 251
128 uint32_t ipsec_spd_hashsize = 0;
129 
130 #define	IPSEC_SEL_NOHASH ((uint32_t)(~0))
131 
132 static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE];
133 static HASH_HEAD(ipsec_sel) *ipsec_sel_hash;
134 
135 static kmem_cache_t *ipsec_action_cache;
136 static kmem_cache_t *ipsec_sel_cache;
137 static kmem_cache_t *ipsec_pol_cache;
138 static kmem_cache_t *ipsec_info_cache;
139 
140 boolean_t ipsec_inbound_v4_policy_present = B_FALSE;
141 boolean_t ipsec_outbound_v4_policy_present = B_FALSE;
142 boolean_t ipsec_inbound_v6_policy_present = B_FALSE;
143 boolean_t ipsec_outbound_v6_policy_present = B_FALSE;
144 
145 /*
146  * Because policy needs to know what algorithms are supported, keep the
147  * lists of algorithms here.
148  */
149 
150 kmutex_t alg_lock;
151 uint8_t ipsec_nalgs[IPSEC_NALGTYPES];
152 ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
153 uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
154 ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES];
155 static crypto_notify_handle_t prov_update_handle = NULL;
156 
157 int ipsec_hdr_pullup_needed = 0;
158 int ipsec_weird_null_inbound_policy = 0;
159 
160 #define	ALGBITS_ROUND_DOWN(x, align)	(((x)/(align))*(align))
161 #define	ALGBITS_ROUND_UP(x, align)	ALGBITS_ROUND_DOWN((x)+(align)-1, align)
162 
163 /*
164  * Inbound traffic should have matching identities for both SA's.
165  */
166 
167 #define	SA_IDS_MATCH(sa1, sa2) 						\
168 	(((sa1) == NULL) || ((sa2) == NULL) ||				\
169 	(((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) &&		\
170 	    (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid))))
171 
172 #define	IPPOL_UNCHAIN(php, ip) 						\
173 	HASHLIST_UNCHAIN((ip), ipsp_hash);				\
174 	avl_remove(&(php)->iph_rulebyid, (ip));				\
175 	IPPOL_REFRELE(ip);
176 
177 /*
178  * Policy failure messages.
179  */
180 static char *ipsec_policy_failure_msgs[] = {
181 
182 	/* IPSEC_POLICY_NOT_NEEDED */
183 	"%s: Dropping the datagram because the incoming packet "
184 	"is %s, but the recipient expects clear; Source %s, "
185 	"Destination %s.\n",
186 
187 	/* IPSEC_POLICY_MISMATCH */
188 	"%s: Policy Failure for the incoming packet (%s); Source %s, "
189 	"Destination %s.\n",
190 
191 	/* IPSEC_POLICY_AUTH_NOT_NEEDED	*/
192 	"%s: Authentication present while not expected in the "
193 	"incoming %s packet; Source %s, Destination %s.\n",
194 
195 	/* IPSEC_POLICY_ENCR_NOT_NEEDED */
196 	"%s: Encryption present while not expected in the "
197 	"incoming %s packet; Source %s, Destination %s.\n",
198 
199 	/* IPSEC_POLICY_SE_NOT_NEEDED */
200 	"%s: Self-Encapsulation present while not expected in the "
201 	"incoming %s packet; Source %s, Destination %s.\n",
202 };
203 /*
204  * Have a counter for every possible policy message in the previous array.
205  */
206 static uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX];
207 /* Time since last ipsec policy failure that printed a message. */
208 hrtime_t ipsec_policy_failure_last = 0;
209 
210 /*
211  * General overviews:
212  *
213  * Locking:
214  *
215  *	All of the system policy structures are protected by a single
216  *	rwlock, ipsec_conf_lock.  These structures are threaded in a
217  *	fairly complex fashion and are not expected to change on a
218  *	regular basis, so this should not cause scaling/contention
219  *	problems.  As a result, policy checks should (hopefully) be MT-hot.
220  *
221  * Allocation policy:
222  *
223  *	We use custom kmem cache types for the various
224  *	bits & pieces of the policy data structures.  All allocations
225  *	use KM_NOSLEEP instead of KM_SLEEP for policy allocation.  The
226  *	policy table is of potentially unbounded size, so we don't
227  *	want to provide a way to hog all system memory with policy
228  *	entries..
229  */
230 
231 
232 /*
233  * AVL tree comparison function.
234  * the in-kernel avl assumes unique keys for all objects.
235  * Since sometimes policy will duplicate rules, we may insert
236  * multiple rules with the same rule id, so we need a tie-breaker.
237  */
238 static int
239 ipsec_policy_cmpbyid(const void *a, const void *b)
240 {
241 	const ipsec_policy_t *ipa, *ipb;
242 	uint64_t idxa, idxb;
243 
244 	ipa = (const ipsec_policy_t *)a;
245 	ipb = (const ipsec_policy_t *)b;
246 	idxa = ipa->ipsp_index;
247 	idxb = ipb->ipsp_index;
248 
249 	if (idxa < idxb)
250 		return (-1);
251 	if (idxa > idxb)
252 		return (1);
253 	/*
254 	 * Tie-breaker #1: All installed policy rules have a non-NULL
255 	 * ipsl_sel (selector set), so an entry with a NULL ipsp_sel is not
256 	 * actually in-tree but rather a template node being used in
257 	 * an avl_find query; see ipsec_policy_delete().  This gives us
258 	 * a placeholder in the ordering just before the the first entry with
259 	 * a key >= the one we're looking for, so we can walk forward from
260 	 * that point to get the remaining entries with the same id.
261 	 */
262 	if ((ipa->ipsp_sel == NULL) && (ipb->ipsp_sel != NULL))
263 		return (-1);
264 	if ((ipb->ipsp_sel == NULL) && (ipa->ipsp_sel != NULL))
265 		return (1);
266 	/*
267 	 * At most one of the arguments to the comparison should have a
268 	 * NULL selector pointer; if not, the tree is broken.
269 	 */
270 	ASSERT(ipa->ipsp_sel != NULL);
271 	ASSERT(ipb->ipsp_sel != NULL);
272 	/*
273 	 * Tie-breaker #2: use the virtual address of the policy node
274 	 * to arbitrarily break ties.  Since we use the new tree node in
275 	 * the avl_find() in ipsec_insert_always, the new node will be
276 	 * inserted into the tree in the right place in the sequence.
277 	 */
278 	if (ipa < ipb)
279 		return (-1);
280 	if (ipa > ipb)
281 		return (1);
282 	return (0);
283 }
284 
285 static void
286 ipsec_polhead_free_table(ipsec_policy_head_t *iph)
287 {
288 	int dir, nchains;
289 
290 	nchains = ipsec_spd_hashsize;
291 
292 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
293 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
294 
295 		if (ipr->ipr_hash == NULL)
296 			continue;
297 
298 		kmem_free(ipr->ipr_hash, nchains *
299 		    sizeof (ipsec_policy_hash_t));
300 	}
301 }
302 
303 static void
304 ipsec_polhead_destroy(ipsec_policy_head_t *iph)
305 {
306 	int dir;
307 
308 	avl_destroy(&iph->iph_rulebyid);
309 	rw_destroy(&iph->iph_lock);
310 
311 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
312 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
313 		int nchains = ipr->ipr_nchains;
314 		int chain;
315 
316 		for (chain = 0; chain < nchains; chain++)
317 			mutex_destroy(&(ipr->ipr_hash[chain].hash_lock));
318 
319 	}
320 	ipsec_polhead_free_table(iph);
321 }
322 
323 /*
324  * Module unload hook.
325  */
326 void
327 ipsec_policy_destroy(void)
328 {
329 	int i;
330 
331 	ip_drop_unregister(&spd_dropper);
332 	ip_drop_destroy();
333 
334 	ipsec_polhead_destroy(&system_policy);
335 	ipsec_polhead_destroy(&inactive_policy);
336 
337 	for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++)
338 		mutex_destroy(&(ipsec_action_hash[i].hash_lock));
339 
340 	for (i = 0; i < ipsec_spd_hashsize; i++)
341 		mutex_destroy(&(ipsec_sel_hash[i].hash_lock));
342 
343 	ipsec_unregister_prov_update();
344 
345 	mutex_destroy(&alg_lock);
346 
347 	kmem_cache_destroy(ipsec_action_cache);
348 	kmem_cache_destroy(ipsec_sel_cache);
349 	kmem_cache_destroy(ipsec_pol_cache);
350 	kmem_cache_destroy(ipsec_info_cache);
351 	ipsid_gc();
352 	ipsid_fini();
353 }
354 
355 
356 /*
357  * Called when table allocation fails to free the table.
358  */
359 static int
360 ipsec_alloc_tables_failed()
361 {
362 	if (ipsec_sel_hash != NULL) {
363 		kmem_free(ipsec_sel_hash, ipsec_spd_hashsize *
364 		    sizeof (*ipsec_sel_hash));
365 		ipsec_sel_hash = NULL;
366 	}
367 	ipsec_polhead_free_table(&system_policy);
368 	ipsec_polhead_free_table(&inactive_policy);
369 
370 	return (ENOMEM);
371 }
372 
373 /*
374  * Attempt to allocate the tables in a single policy head.
375  * Return nonzero on failure after cleaning up any work in progress.
376  */
377 static int
378 ipsec_alloc_table(ipsec_policy_head_t *iph, int kmflag)
379 {
380 	int dir, nchains;
381 
382 	nchains = ipsec_spd_hashsize;
383 
384 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
385 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
386 
387 		ipr->ipr_hash = kmem_zalloc(nchains *
388 		    sizeof (ipsec_policy_hash_t), kmflag);
389 		if (ipr->ipr_hash == NULL)
390 			return (ipsec_alloc_tables_failed());
391 	}
392 	return (0);
393 }
394 
395 /*
396  * Attempt to allocate the various tables.  Return nonzero on failure
397  * after cleaning up any work in progress.
398  */
399 static int
400 ipsec_alloc_tables(int kmflag)
401 {
402 	int error;
403 
404 	error = ipsec_alloc_table(&system_policy, kmflag);
405 	if (error != 0)
406 		return (error);
407 
408 	error = ipsec_alloc_table(&inactive_policy, kmflag);
409 	if (error != 0)
410 		return (error);
411 
412 	ipsec_sel_hash = kmem_zalloc(ipsec_spd_hashsize *
413 	    sizeof (*ipsec_sel_hash), kmflag);
414 
415 	if (ipsec_sel_hash == NULL)
416 		return (ipsec_alloc_tables_failed());
417 
418 	return (0);
419 }
420 
421 /*
422  * After table allocation, initialize a policy head.
423  */
424 static void
425 ipsec_polhead_init(ipsec_policy_head_t *iph)
426 {
427 	int dir, chain, nchains;
428 
429 	nchains = ipsec_spd_hashsize;
430 
431 	rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL);
432 	avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid,
433 	    sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid));
434 
435 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
436 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
437 		ipr->ipr_nchains = nchains;
438 
439 		for (chain = 0; chain < nchains; chain++) {
440 			mutex_init(&(ipr->ipr_hash[chain].hash_lock),
441 			    NULL, MUTEX_DEFAULT, NULL);
442 		}
443 	}
444 }
445 
446 /*
447  * Module load hook.
448  */
449 void
450 ipsec_policy_init()
451 {
452 	int i;
453 
454 	/*
455 	 * Make two attempts to allocate policy hash tables; try it at
456 	 * the "preferred" size (may be set in /etc/system) first,
457 	 * then fall back to the default size.
458 	 */
459 	if (ipsec_spd_hashsize == 0)
460 		ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT;
461 
462 	if (ipsec_alloc_tables(KM_NOSLEEP) != 0) {
463 		cmn_err(CE_WARN,
464 		    "Unable to allocate %d entry IPsec policy hash table",
465 		    ipsec_spd_hashsize);
466 		ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT;
467 		cmn_err(CE_WARN, "Falling back to %d entries",
468 		    ipsec_spd_hashsize);
469 		(void) ipsec_alloc_tables(KM_SLEEP);
470 	}
471 
472 	ipsid_init();
473 	ipsec_polhead_init(&system_policy);
474 	ipsec_polhead_init(&inactive_policy);
475 
476 	for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++)
477 		mutex_init(&(ipsec_action_hash[i].hash_lock),
478 		    NULL, MUTEX_DEFAULT, NULL);
479 
480 	for (i = 0; i < ipsec_spd_hashsize; i++)
481 		mutex_init(&(ipsec_sel_hash[i].hash_lock),
482 		    NULL, MUTEX_DEFAULT, NULL);
483 
484 	mutex_init(&alg_lock, NULL, MUTEX_DEFAULT, NULL);
485 
486 	for (i = 0; i < IPSEC_NALGTYPES; i++)
487 		ipsec_nalgs[i] = 0;
488 
489 	ipsec_action_cache = kmem_cache_create("ipsec_actions",
490 	    sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL,
491 	    ipsec_action_reclaim, NULL, NULL, 0);
492 	ipsec_sel_cache = kmem_cache_create("ipsec_selectors",
493 	    sizeof (ipsec_sel_t), _POINTER_ALIGNMENT, NULL, NULL,
494 	    NULL, NULL, NULL, 0);
495 	ipsec_pol_cache = kmem_cache_create("ipsec_policy",
496 	    sizeof (ipsec_policy_t), _POINTER_ALIGNMENT, NULL, NULL,
497 	    NULL, NULL, NULL, 0);
498 	ipsec_info_cache = kmem_cache_create("ipsec_info",
499 	    sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL,
500 	    NULL, NULL, NULL, 0);
501 
502 	ip_drop_init();
503 	ip_drop_register(&spd_dropper, "IPsec SPD");
504 }
505 
506 /*
507  * Sort algorithm lists.
508  *
509  * I may need to split this based on
510  * authentication/encryption, and I may wish to have an administrator
511  * configure this list.  Hold on to some NDD variables...
512  *
513  * XXX For now, sort on minimum key size (GAG!).  While minimum key size is
514  * not the ideal metric, it's the only quantifiable measure available.
515  * We need a better metric for sorting algorithms by preference.
516  */
517 static void
518 alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid)
519 {
520 	ipsec_alginfo_t *ai = ipsec_alglists[at][algid];
521 	uint8_t holder, swap;
522 	uint_t i;
523 	uint_t count = ipsec_nalgs[at];
524 	ASSERT(ai != NULL);
525 	ASSERT(algid == ai->alg_id);
526 
527 	ASSERT(MUTEX_HELD(&alg_lock));
528 
529 	holder = algid;
530 
531 	for (i = 0; i < count - 1; i++) {
532 		ipsec_alginfo_t *alt;
533 
534 		alt = ipsec_alglists[at][ipsec_sortlist[at][i]];
535 		/*
536 		 * If you want to give precedence to newly added algs,
537 		 * add the = in the > comparison.
538 		 */
539 		if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) {
540 			/* Swap sortlist[i] and holder. */
541 			swap = ipsec_sortlist[at][i];
542 			ipsec_sortlist[at][i] = holder;
543 			holder = swap;
544 			ai = alt;
545 		} /* Else just continue. */
546 	}
547 
548 	/* Store holder in last slot. */
549 	ipsec_sortlist[at][i] = holder;
550 }
551 
552 /*
553  * Remove an algorithm from a sorted algorithm list.
554  * This should be considerably easier, even with complex sorting.
555  */
556 static void
557 alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid)
558 {
559 	boolean_t copyback = B_FALSE;
560 	int i;
561 	int newcount = ipsec_nalgs[at];
562 
563 	ASSERT(MUTEX_HELD(&alg_lock));
564 
565 	for (i = 0; i <= newcount; i++) {
566 		if (copyback)
567 			ipsec_sortlist[at][i-1] = ipsec_sortlist[at][i];
568 		else if (ipsec_sortlist[at][i] == algid)
569 			copyback = B_TRUE;
570 	}
571 }
572 
573 /*
574  * Add the specified algorithm to the algorithm tables.
575  * Must be called while holding the algorithm table writer lock.
576  */
577 void
578 ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg)
579 {
580 	ASSERT(MUTEX_HELD(&alg_lock));
581 
582 	ASSERT(ipsec_alglists[algtype][alg->alg_id] == NULL);
583 	ipsec_alg_fix_min_max(alg, algtype);
584 	ipsec_alglists[algtype][alg->alg_id] = alg;
585 
586 	ipsec_nalgs[algtype]++;
587 	alg_insert_sortlist(algtype, alg->alg_id);
588 }
589 
590 /*
591  * Remove the specified algorithm from the algorithm tables.
592  * Must be called while holding the algorithm table writer lock.
593  */
594 void
595 ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid)
596 {
597 	ASSERT(MUTEX_HELD(&alg_lock));
598 
599 	ASSERT(ipsec_alglists[algtype][algid] != NULL);
600 	ipsec_alg_free(ipsec_alglists[algtype][algid]);
601 	ipsec_alglists[algtype][algid] = NULL;
602 
603 	ipsec_nalgs[algtype]--;
604 	alg_remove_sortlist(algtype, algid);
605 }
606 
607 /*
608  * Hooks for spdsock to get a grip on system policy.
609  */
610 
611 ipsec_policy_head_t *
612 ipsec_system_policy(void)
613 {
614 	ipsec_policy_head_t *h = &system_policy;
615 	IPPH_REFHOLD(h);
616 	return (h);
617 }
618 
619 ipsec_policy_head_t *
620 ipsec_inactive_policy(void)
621 {
622 	ipsec_policy_head_t *h = &inactive_policy;
623 	IPPH_REFHOLD(h);
624 	return (h);
625 }
626 
627 /*
628  * Lock inactive policy, then active policy, then exchange policy root
629  * pointers.
630  */
631 void
632 ipsec_swap_policy(void)
633 {
634 	int af, dir;
635 	avl_tree_t r1, r2;
636 
637 	rw_enter(&inactive_policy.iph_lock, RW_WRITER);
638 	rw_enter(&system_policy.iph_lock, RW_WRITER);
639 
640 	r1 = system_policy.iph_rulebyid;
641 	r2 = inactive_policy.iph_rulebyid;
642 	system_policy.iph_rulebyid = r2;
643 	inactive_policy.iph_rulebyid = r1;
644 
645 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
646 		ipsec_policy_hash_t *h1, *h2;
647 
648 		h1 = system_policy.iph_root[dir].ipr_hash;
649 		h2 = inactive_policy.iph_root[dir].ipr_hash;
650 		system_policy.iph_root[dir].ipr_hash = h2;
651 		inactive_policy.iph_root[dir].ipr_hash = h1;
652 
653 		for (af = 0; af < IPSEC_NAF; af++) {
654 			ipsec_policy_t *t1, *t2;
655 
656 			t1 = system_policy.iph_root[dir].ipr_nonhash[af];
657 			t2 = inactive_policy.iph_root[dir].ipr_nonhash[af];
658 			system_policy.iph_root[dir].ipr_nonhash[af] = t2;
659 			inactive_policy.iph_root[dir].ipr_nonhash[af] = t1;
660 			if (t1 != NULL) {
661 				t1->ipsp_hash.hash_pp =
662 				    &(inactive_policy.iph_root[dir].
663 				    ipr_nonhash[af]);
664 			}
665 			if (t2 != NULL) {
666 				t2->ipsp_hash.hash_pp =
667 				    &(system_policy.iph_root[dir].
668 				    ipr_nonhash[af]);
669 			}
670 
671 		}
672 	}
673 	system_policy.iph_gen++;
674 	inactive_policy.iph_gen++;
675 	ipsec_update_present_flags();
676 	rw_exit(&system_policy.iph_lock);
677 	rw_exit(&inactive_policy.iph_lock);
678 }
679 
680 /*
681  * Clone one policy rule..
682  */
683 static ipsec_policy_t *
684 ipsec_copy_policy(const ipsec_policy_t *src)
685 {
686 	ipsec_policy_t *dst = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP);
687 
688 	if (dst == NULL)
689 		return (NULL);
690 
691 	/*
692 	 * Adjust refcounts of cloned state.
693 	 */
694 	IPACT_REFHOLD(src->ipsp_act);
695 	src->ipsp_sel->ipsl_refs++;
696 
697 	HASH_NULL(dst, ipsp_hash);
698 	dst->ipsp_refs = 1;
699 	dst->ipsp_sel = src->ipsp_sel;
700 	dst->ipsp_act = src->ipsp_act;
701 	dst->ipsp_prio = src->ipsp_prio;
702 	dst->ipsp_index = src->ipsp_index;
703 
704 	return (dst);
705 }
706 
707 void
708 ipsec_insert_always(avl_tree_t *tree, void *new_node)
709 {
710 	void *node;
711 	avl_index_t where;
712 
713 	node = avl_find(tree, new_node, &where);
714 	ASSERT(node == NULL);
715 	avl_insert(tree, new_node, where);
716 }
717 
718 
719 static int
720 ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src,
721     ipsec_policy_t **dstp)
722 {
723 	for (; src != NULL; src = src->ipsp_hash.hash_next) {
724 		ipsec_policy_t *dst = ipsec_copy_policy(src);
725 		if (dst == NULL)
726 			return (ENOMEM);
727 
728 		HASHLIST_INSERT(dst, ipsp_hash, *dstp);
729 		ipsec_insert_always(&dph->iph_rulebyid, dst);
730 	}
731 	return (0);
732 }
733 
734 
735 
736 /*
737  * Make one policy head look exactly like another.
738  *
739  * As with ipsec_swap_policy, we lock the destination policy head first, then
740  * the source policy head. Note that we only need to read-lock the source
741  * policy head as we are not changing it.
742  */
743 static int
744 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph)
745 {
746 	int af, dir, chain, nchains;
747 
748 	rw_enter(&dph->iph_lock, RW_WRITER);
749 
750 	ipsec_polhead_flush(dph);
751 
752 	rw_enter(&sph->iph_lock, RW_READER);
753 
754 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
755 		ipsec_policy_root_t *dpr = &dph->iph_root[dir];
756 		ipsec_policy_root_t *spr = &sph->iph_root[dir];
757 		nchains = dpr->ipr_nchains;
758 
759 		ASSERT(dpr->ipr_nchains == spr->ipr_nchains);
760 
761 		for (af = 0; af < IPSEC_NAF; af++) {
762 			if (ipsec_copy_chain(dph, spr->ipr_nonhash[af],
763 			    &dpr->ipr_nonhash[af]))
764 				goto abort_copy;
765 		}
766 
767 		for (chain = 0; chain < nchains; chain++) {
768 			if (ipsec_copy_chain(dph,
769 			    spr->ipr_hash[chain].hash_head,
770 			    &dpr->ipr_hash[chain].hash_head))
771 				goto abort_copy;
772 		}
773 	}
774 
775 	dph->iph_gen++;
776 
777 	rw_exit(&sph->iph_lock);
778 	rw_exit(&dph->iph_lock);
779 	return (0);
780 
781 abort_copy:
782 	ipsec_polhead_flush(dph);
783 	rw_exit(&sph->iph_lock);
784 	rw_exit(&dph->iph_lock);
785 	return (ENOMEM);
786 }
787 
788 /*
789  * Clone currently active policy to the inactive policy list.
790  */
791 int
792 ipsec_clone_system_policy(void)
793 {
794 	return (ipsec_copy_polhead(&system_policy, &inactive_policy));
795 }
796 
797 
798 /*
799  * Extract the string from ipsec_policy_failure_msgs[type] and
800  * log it.
801  *
802  * This function needs to be kept in synch with ipsec_rl_strlog() in
803  * sadb.c.
804  * XXX this function should be combined with the ipsec_rl_strlog() function.
805  */
806 void
807 ipsec_log_policy_failure(queue_t *q, int type, char *func_name, ipha_t *ipha,
808     ip6_t *ip6h, boolean_t secure)
809 {
810 	char	sbuf[INET6_ADDRSTRLEN];
811 	char	dbuf[INET6_ADDRSTRLEN];
812 	char	*s;
813 	char	*d;
814 	hrtime_t current = gethrtime();
815 
816 	ASSERT((ipha == NULL && ip6h != NULL) ||
817 	    (ip6h == NULL && ipha != NULL));
818 
819 	if (ipha != NULL) {
820 		s = inet_ntop(AF_INET, &ipha->ipha_src, sbuf, sizeof (sbuf));
821 		d = inet_ntop(AF_INET, &ipha->ipha_dst, dbuf, sizeof (dbuf));
822 	} else {
823 		s = inet_ntop(AF_INET6, &ip6h->ip6_src, sbuf, sizeof (sbuf));
824 		d = inet_ntop(AF_INET6, &ip6h->ip6_dst, dbuf, sizeof (dbuf));
825 
826 	}
827 
828 	/* Always bump the policy failure counter. */
829 	ipsec_policy_failure_count[type]++;
830 
831 	/* Convert interval (in msec) to hrtime (in nsec), which means * 10^6 */
832 	if (ipsec_policy_failure_last +
833 	    ((hrtime_t)ipsec_policy_log_interval * (hrtime_t)1000000) <=
834 	    current) {
835 		/*
836 		 * Throttle the logging such that I only log one message
837 		 * every 'ipsec_policy_log_interval' amount of time.
838 		 */
839 		(void) mi_strlog(q, 0, SL_ERROR|SL_WARN|SL_CONSOLE,
840 		    ipsec_policy_failure_msgs[type],
841 		    func_name,
842 		    (secure ? "secure" : "not secure"), s, d);
843 		ipsec_policy_failure_last = current;
844 	}
845 }
846 
847 void
848 ipsec_config_flush()
849 {
850 	rw_enter(&system_policy.iph_lock, RW_WRITER);
851 	ipsec_polhead_flush(&system_policy);
852 	ipsec_next_policy_index = 1;
853 	rw_exit(&system_policy.iph_lock);
854 	ipsec_action_reclaim(0);
855 }
856 
857 /*
858  * Clip a policy's min/max keybits vs. the capabilities of the
859  * algorithm.
860  */
861 static void
862 act_alg_adjust(uint_t algtype, uint_t algid,
863     uint16_t *minbits, uint16_t *maxbits)
864 {
865 	ipsec_alginfo_t *algp = ipsec_alglists[algtype][algid];
866 	if (algp != NULL) {
867 		/*
868 		 * If passed-in minbits is zero, we assume the caller trusts
869 		 * us with setting the minimum key size.  We pick the
870 		 * algorithms DEFAULT key size for the minimum in this case.
871 		 */
872 		if (*minbits == 0) {
873 			*minbits = algp->alg_default_bits;
874 			ASSERT(*minbits >= algp->alg_minbits);
875 		} else {
876 			*minbits = MAX(*minbits, algp->alg_minbits);
877 		}
878 		if (*maxbits == 0)
879 			*maxbits = algp->alg_maxbits;
880 		else
881 			*maxbits = MIN(*maxbits, algp->alg_maxbits);
882 		ASSERT(*minbits <= *maxbits);
883 	} else {
884 		*minbits = 0;
885 		*maxbits = 0;
886 	}
887 }
888 
889 /*
890  * Check an action's requested algorithms against the algorithms currently
891  * loaded in the system.
892  */
893 boolean_t
894 ipsec_check_action(ipsec_act_t *act, int *diag)
895 {
896 	ipsec_prot_t *ipp;
897 
898 	ipp = &act->ipa_apply;
899 
900 	if (ipp->ipp_use_ah &&
901 	    ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) {
902 		*diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG;
903 		return (B_FALSE);
904 	}
905 	if (ipp->ipp_use_espa &&
906 	    ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == NULL) {
907 		*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG;
908 		return (B_FALSE);
909 	}
910 	if (ipp->ipp_use_esp &&
911 	    ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) {
912 		*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG;
913 		return (B_FALSE);
914 	}
915 
916 	act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg,
917 	    &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits);
918 	act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg,
919 	    &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits);
920 	act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg,
921 	    &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits);
922 
923 	if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) {
924 		*diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE;
925 		return (B_FALSE);
926 	}
927 	if (ipp->ipp_espa_minbits > ipp->ipp_espa_maxbits) {
928 		*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_KEYSIZE;
929 		return (B_FALSE);
930 	}
931 	if (ipp->ipp_espe_minbits > ipp->ipp_espe_maxbits) {
932 		*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_KEYSIZE;
933 		return (B_FALSE);
934 	}
935 	/* TODO: sanity check lifetimes */
936 	return (B_TRUE);
937 }
938 
939 /*
940  * Set up a single action during wildcard expansion..
941  */
942 static void
943 ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act,
944     uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg)
945 {
946 	ipsec_prot_t *ipp;
947 
948 	*outact = *act;
949 	ipp = &outact->ipa_apply;
950 	ipp->ipp_auth_alg = (uint8_t)auth_alg;
951 	ipp->ipp_encr_alg = (uint8_t)encr_alg;
952 	ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg;
953 
954 	act_alg_adjust(IPSEC_ALG_AUTH, auth_alg,
955 	    &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits);
956 	act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg,
957 	    &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits);
958 	act_alg_adjust(IPSEC_ALG_ENCR, encr_alg,
959 	    &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits);
960 }
961 
962 /*
963  * combinatoric expansion time: expand a wildcarded action into an
964  * array of wildcarded actions; we return the exploded action list,
965  * and return a count in *nact (output only).
966  */
967 static ipsec_act_t *
968 ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact)
969 {
970 	boolean_t use_ah, use_esp, use_espa;
971 	boolean_t wild_auth, wild_encr, wild_eauth;
972 	uint_t	auth_alg, auth_idx, auth_min, auth_max;
973 	uint_t	eauth_alg, eauth_idx, eauth_min, eauth_max;
974 	uint_t  encr_alg, encr_idx, encr_min, encr_max;
975 	uint_t	action_count, ai;
976 	ipsec_act_t *outact;
977 
978 	if (act->ipa_type != IPSEC_ACT_APPLY) {
979 		outact = kmem_alloc(sizeof (*act), KM_NOSLEEP);
980 		*nact = 1;
981 		if (outact != NULL)
982 			bcopy(act, outact, sizeof (*act));
983 		return (outact);
984 	}
985 	/*
986 	 * compute the combinatoric explosion..
987 	 *
988 	 * we assume a request for encr if esp_req is PREF_REQUIRED
989 	 * we assume a request for ah auth if ah_req is PREF_REQUIRED.
990 	 * we assume a request for esp auth if !ah and esp_req is PREF_REQUIRED
991 	 */
992 
993 	use_ah = act->ipa_apply.ipp_use_ah;
994 	use_esp = act->ipa_apply.ipp_use_esp;
995 	use_espa = act->ipa_apply.ipp_use_espa;
996 	auth_alg = act->ipa_apply.ipp_auth_alg;
997 	eauth_alg = act->ipa_apply.ipp_esp_auth_alg;
998 	encr_alg = act->ipa_apply.ipp_encr_alg;
999 
1000 	wild_auth = use_ah && (auth_alg == 0);
1001 	wild_eauth = use_espa && (eauth_alg == 0);
1002 	wild_encr = use_esp && (encr_alg == 0);
1003 
1004 	action_count = 1;
1005 	auth_min = auth_max = auth_alg;
1006 	eauth_min = eauth_max = eauth_alg;
1007 	encr_min = encr_max = encr_alg;
1008 
1009 	/*
1010 	 * set up for explosion.. for each dimension, expand output
1011 	 * size by the explosion factor.
1012 	 *
1013 	 * Don't include the "any" algorithms, if defined, as no
1014 	 * kernel policies should be set for these algorithms.
1015 	 */
1016 
1017 #define	SET_EXP_MINMAX(type, wild, alg, min, max) if (wild) {	\
1018 		int nalgs = ipsec_nalgs[type];			\
1019 		if (ipsec_alglists[type][alg] != NULL)		\
1020 			nalgs--;				\
1021 		action_count *= nalgs;				\
1022 		min = 0;					\
1023 		max = ipsec_nalgs[type] - 1;			\
1024 	}
1025 
1026 	SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE,
1027 	    auth_min, auth_max);
1028 	SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE,
1029 	    eauth_min, eauth_max);
1030 	SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE,
1031 	    encr_min, encr_max);
1032 
1033 #undef	SET_EXP_MINMAX
1034 
1035 	/*
1036 	 * ok, allocate the whole mess..
1037 	 */
1038 
1039 	outact = kmem_alloc(sizeof (*outact) * action_count, KM_NOSLEEP);
1040 	if (outact == NULL)
1041 		return (NULL);
1042 
1043 	/*
1044 	 * Now compute all combinations.  Note that non-wildcarded
1045 	 * dimensions just get a single value from auth_min, while
1046 	 * wildcarded dimensions indirect through the sortlist.
1047 	 *
1048 	 * We do encryption outermost since, at this time, there's
1049 	 * greater difference in security and performance between
1050 	 * encryption algorithms vs. authentication algorithms.
1051 	 */
1052 
1053 	ai = 0;
1054 
1055 #define	WHICH_ALG(type, wild, idx) ((wild)?(ipsec_sortlist[type][idx]):(idx))
1056 
1057 	for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) {
1058 		encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx);
1059 		if (wild_encr && encr_alg == SADB_EALG_NONE)
1060 			continue;
1061 		for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) {
1062 			auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth,
1063 			    auth_idx);
1064 			if (wild_auth && auth_alg == SADB_AALG_NONE)
1065 				continue;
1066 			for (eauth_idx = eauth_min; eauth_idx <= eauth_max;
1067 			    eauth_idx++) {
1068 				eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH,
1069 				    wild_eauth, eauth_idx);
1070 				if (wild_eauth && eauth_alg == SADB_AALG_NONE)
1071 					continue;
1072 
1073 				ipsec_setup_act(&outact[ai], act,
1074 				    auth_alg, encr_alg, eauth_alg);
1075 				ai++;
1076 			}
1077 		}
1078 	}
1079 
1080 #undef WHICH_ALG
1081 
1082 	ASSERT(ai == action_count);
1083 	*nact = action_count;
1084 	return (outact);
1085 }
1086 
1087 /*
1088  * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t.
1089  */
1090 static void
1091 ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp)
1092 {
1093 	bzero(ipp, sizeof (*ipp));
1094 	/*
1095 	 * ipp_use_* are bitfields.  Look at "!!" in the following as a
1096 	 * "boolean canonicalization" operator.
1097 	 */
1098 	ipp->ipp_use_ah = !!(req->ipsr_ah_req & IPSEC_PREF_REQUIRED);
1099 	ipp->ipp_use_esp = !!(req->ipsr_esp_req & IPSEC_PREF_REQUIRED);
1100 	ipp->ipp_use_espa = !!(req->ipsr_esp_auth_alg) || !ipp->ipp_use_ah;
1101 	ipp->ipp_use_se = !!(req->ipsr_self_encap_req & IPSEC_PREF_REQUIRED);
1102 	ipp->ipp_use_unique = !!((req->ipsr_ah_req|req->ipsr_esp_req) &
1103 	    IPSEC_PREF_UNIQUE);
1104 	ipp->ipp_encr_alg = req->ipsr_esp_alg;
1105 	ipp->ipp_auth_alg = req->ipsr_auth_alg;
1106 	ipp->ipp_esp_auth_alg = req->ipsr_esp_auth_alg;
1107 }
1108 
1109 /*
1110  * Extract a new-style action from a request.
1111  */
1112 void
1113 ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp)
1114 {
1115 	struct ipsec_act act;
1116 	bzero(&act, sizeof (act));
1117 	if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) &&
1118 	    (req->ipsr_esp_req & IPSEC_PREF_NEVER)) {
1119 		act.ipa_type = IPSEC_ACT_BYPASS;
1120 	} else {
1121 		act.ipa_type = IPSEC_ACT_APPLY;
1122 		ipsec_prot_from_req(req, &act.ipa_apply);
1123 	}
1124 	*actp = ipsec_act_wildcard_expand(&act, nactp);
1125 }
1126 
1127 /*
1128  * Convert a new-style "prot" back to an ipsec_req_t (more backwards compat).
1129  * We assume caller has already zero'ed *req for us.
1130  */
1131 static int
1132 ipsec_req_from_prot(ipsec_prot_t *ipp, ipsec_req_t *req)
1133 {
1134 	req->ipsr_esp_alg = ipp->ipp_encr_alg;
1135 	req->ipsr_auth_alg = ipp->ipp_auth_alg;
1136 	req->ipsr_esp_auth_alg = ipp->ipp_esp_auth_alg;
1137 
1138 	if (ipp->ipp_use_unique) {
1139 		req->ipsr_ah_req |= IPSEC_PREF_UNIQUE;
1140 		req->ipsr_esp_req |= IPSEC_PREF_UNIQUE;
1141 	}
1142 	if (ipp->ipp_use_se)
1143 		req->ipsr_self_encap_req |= IPSEC_PREF_REQUIRED;
1144 	if (ipp->ipp_use_ah)
1145 		req->ipsr_ah_req |= IPSEC_PREF_REQUIRED;
1146 	if (ipp->ipp_use_esp)
1147 		req->ipsr_esp_req |= IPSEC_PREF_REQUIRED;
1148 	return (sizeof (*req));
1149 }
1150 
1151 /*
1152  * Convert a new-style action back to an ipsec_req_t (more backwards compat).
1153  * We assume caller has already zero'ed *req for us.
1154  */
1155 static int
1156 ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req)
1157 {
1158 	switch (ap->ipa_act.ipa_type) {
1159 	case IPSEC_ACT_BYPASS:
1160 		req->ipsr_ah_req = IPSEC_PREF_NEVER;
1161 		req->ipsr_esp_req = IPSEC_PREF_NEVER;
1162 		return (sizeof (*req));
1163 	case IPSEC_ACT_APPLY:
1164 		return (ipsec_req_from_prot(&ap->ipa_act.ipa_apply, req));
1165 	}
1166 	return (sizeof (*req));
1167 }
1168 
1169 /*
1170  * Convert a new-style action back to an ipsec_req_t (more backwards compat).
1171  * We assume caller has already zero'ed *req for us.
1172  */
1173 static int
1174 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af)
1175 {
1176 	ipsec_policy_t *p;
1177 
1178 	/*
1179 	 * FULL-PERSOCK: consult hash table, too?
1180 	 */
1181 	for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af];
1182 	    p != NULL;
1183 	    p = p->ipsp_hash.hash_next) {
1184 		if ((p->ipsp_sel->ipsl_key.ipsl_valid&IPSL_WILDCARD) == 0)
1185 			return (ipsec_req_from_act(p->ipsp_act, req));
1186 	}
1187 	return (sizeof (*req));
1188 }
1189 
1190 /*
1191  * Based on per-socket or latched policy, convert to an appropriate
1192  * IP_SEC_OPT ipsec_req_t for the socket option; return size so we can
1193  * be tail-called from ip.
1194  */
1195 int
1196 ipsec_req_from_conn(conn_t *connp, ipsec_req_t *req, int af)
1197 {
1198 	ipsec_latch_t *ipl;
1199 	int rv = sizeof (ipsec_req_t);
1200 
1201 	bzero(req, sizeof (*req));
1202 
1203 	mutex_enter(&connp->conn_lock);
1204 	ipl = connp->conn_latch;
1205 
1206 	/*
1207 	 * Find appropriate policy.  First choice is latched action;
1208 	 * failing that, see latched policy; failing that,
1209 	 * look at configured policy.
1210 	 */
1211 	if (ipl != NULL) {
1212 		if (ipl->ipl_in_action != NULL) {
1213 			rv = ipsec_req_from_act(ipl->ipl_in_action, req);
1214 			goto done;
1215 		}
1216 		if (ipl->ipl_in_policy != NULL) {
1217 			rv = ipsec_req_from_act(ipl->ipl_in_policy->ipsp_act,
1218 			    req);
1219 			goto done;
1220 		}
1221 	}
1222 	if (connp->conn_policy != NULL)
1223 		rv = ipsec_req_from_head(connp->conn_policy, req, af);
1224 done:
1225 	mutex_exit(&connp->conn_lock);
1226 	return (rv);
1227 }
1228 
1229 void
1230 ipsec_actvec_free(ipsec_act_t *act, uint_t nact)
1231 {
1232 	kmem_free(act, nact * sizeof (*act));
1233 }
1234 
1235 /*
1236  * When outbound policy is not cached, look it up the hard way and attach
1237  * an ipsec_out_t to the packet..
1238  */
1239 static mblk_t *
1240 ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel)
1241 {
1242 	ipsec_policy_t *p;
1243 
1244 	p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel);
1245 
1246 	if (p == NULL)
1247 		return (NULL);
1248 	return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol));
1249 }
1250 
1251 /*
1252  * We have an ipsec_out already, but don't have cached policy; fill it in
1253  * with the right actions.
1254  */
1255 static mblk_t *
1256 ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp,
1257     ipsec_selector_t *sel)
1258 {
1259 	ipsec_out_t *io;
1260 	ipsec_policy_t *p;
1261 
1262 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
1263 	ASSERT(ipsec_mp->b_cont->b_datap->db_type == M_DATA);
1264 
1265 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
1266 
1267 	if (io->ipsec_out_policy == NULL) {
1268 		p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel);
1269 		io->ipsec_out_policy = p;
1270 	}
1271 	return (ipsec_mp);
1272 }
1273 
1274 
1275 /* ARGSUSED */
1276 /*
1277  * Consumes a reference to ipsp.
1278  */
1279 static mblk_t *
1280 ipsec_check_loopback_policy(queue_t *q, mblk_t *first_mp,
1281     boolean_t mctl_present, ipsec_policy_t *ipsp)
1282 {
1283 	mblk_t *ipsec_mp;
1284 	ipsec_in_t *ii;
1285 
1286 	if (!mctl_present)
1287 		return (first_mp);
1288 
1289 	ipsec_mp = first_mp;
1290 
1291 	ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1292 	ASSERT(ii->ipsec_in_loopback);
1293 	IPPOL_REFRELE(ipsp);
1294 
1295 	/*
1296 	 * We should do an actual policy check here.  Revisit this
1297 	 * when we revisit the IPsec API.
1298 	 */
1299 
1300 	return (first_mp);
1301 }
1302 
1303 /*
1304  * Check that packet's inbound ports & proto match the selectors
1305  * expected by the SAs it traversed on the way in.
1306  */
1307 static boolean_t
1308 ipsec_check_ipsecin_unique(ipsec_in_t *ii, mblk_t *mp,
1309     ipha_t *ipha, ip6_t *ip6h,
1310     const char **reason, kstat_named_t **counter)
1311 {
1312 	uint64_t pkt_unique, ah_mask, esp_mask;
1313 	ipsa_t *ah_assoc = ii->ipsec_in_ah_sa;
1314 	ipsa_t *esp_assoc = ii->ipsec_in_esp_sa;
1315 	ipsec_selector_t sel;
1316 
1317 	ASSERT((ah_assoc != NULL) || (esp_assoc != NULL));
1318 
1319 	ah_mask = (ah_assoc != NULL) ? ah_assoc->ipsa_unique_mask : 0;
1320 	esp_mask = (esp_assoc != NULL) ? esp_assoc->ipsa_unique_mask : 0;
1321 
1322 	if ((ah_mask == 0) && (esp_mask == 0))
1323 		return (B_TRUE);
1324 
1325 	if (!ipsec_init_inbound_sel(&sel, mp, ipha, ip6h)) {
1326 		/*
1327 		 * Technically not a policy mismatch, but it is
1328 		 * an internal failure.
1329 		 */
1330 		*reason = "ipsec_init_inbound_sel";
1331 		*counter = &ipdrops_spd_nomem;
1332 		return (B_FALSE);
1333 	}
1334 
1335 	pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port,
1336 	    sel.ips_protocol);
1337 
1338 	if (ah_mask != 0) {
1339 		if (ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) {
1340 			*reason = "AH inner header mismatch";
1341 			*counter = &ipdrops_spd_ah_innermismatch;
1342 			return (B_FALSE);
1343 		}
1344 	}
1345 	if (esp_mask != 0) {
1346 		if (esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) {
1347 			*reason = "ESP inner header mismatch";
1348 			*counter = &ipdrops_spd_esp_innermismatch;
1349 			return (B_FALSE);
1350 		}
1351 	}
1352 	return (B_TRUE);
1353 }
1354 
1355 static boolean_t
1356 ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap,
1357     ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter)
1358 {
1359 	boolean_t ret = B_TRUE;
1360 	ipsec_prot_t *ipp;
1361 	ipsa_t *ah_assoc;
1362 	ipsa_t *esp_assoc;
1363 	boolean_t decaps;
1364 
1365 	ASSERT((ipha == NULL && ip6h != NULL) ||
1366 	    (ip6h == NULL && ipha != NULL));
1367 
1368 	if (ii->ipsec_in_loopback) {
1369 		/*
1370 		 * Besides accepting pointer-equivalent actions, we also
1371 		 * accept any ICMP errors we generated for ourselves,
1372 		 * regardless of policy.  If we do not wish to make this
1373 		 * assumption in the future, check here, and where
1374 		 * icmp_loopback is initialized in ip.c and ip6.c.  (Look for
1375 		 * ipsec_out_icmp_loopback.)
1376 		 */
1377 		if (ap == ii->ipsec_in_action || ii->ipsec_in_icmp_loopback)
1378 			return (B_TRUE);
1379 
1380 		/* Deep compare necessary here?? */
1381 		*counter = &ipdrops_spd_loopback_mismatch;
1382 		*reason = "loopback policy mismatch";
1383 		return (B_FALSE);
1384 	}
1385 	ASSERT(!ii->ipsec_in_icmp_loopback);
1386 
1387 	ah_assoc = ii->ipsec_in_ah_sa;
1388 	esp_assoc = ii->ipsec_in_esp_sa;
1389 
1390 	decaps = ii->ipsec_in_decaps;
1391 
1392 	switch (ap->ipa_act.ipa_type) {
1393 	case IPSEC_ACT_DISCARD:
1394 	case IPSEC_ACT_REJECT:
1395 		/* Should "fail hard" */
1396 		*counter = &ipdrops_spd_explicit;
1397 		*reason = "blocked by policy";
1398 		return (B_FALSE);
1399 
1400 	case IPSEC_ACT_BYPASS:
1401 	case IPSEC_ACT_CLEAR:
1402 		*counter = &ipdrops_spd_got_secure;
1403 		*reason = "expected clear, got protected";
1404 		return (B_FALSE);
1405 
1406 	case IPSEC_ACT_APPLY:
1407 		ipp = &ap->ipa_act.ipa_apply;
1408 		/*
1409 		 * As of now we do the simple checks of whether
1410 		 * the datagram has gone through the required IPSEC
1411 		 * protocol constraints or not. We might have more
1412 		 * in the future like sensitive levels, key bits, etc.
1413 		 * If it fails the constraints, check whether we would
1414 		 * have accepted this if it had come in clear.
1415 		 */
1416 		if (ipp->ipp_use_ah) {
1417 			if (ah_assoc == NULL) {
1418 				ret = ipsec_inbound_accept_clear(mp, ipha,
1419 				    ip6h);
1420 				*counter = &ipdrops_spd_got_clear;
1421 				*reason = "unprotected not accepted";
1422 				break;
1423 			}
1424 			ASSERT(ah_assoc != NULL);
1425 			ASSERT(ipp->ipp_auth_alg != 0);
1426 
1427 			if (ah_assoc->ipsa_auth_alg !=
1428 			    ipp->ipp_auth_alg) {
1429 				*counter = &ipdrops_spd_bad_ahalg;
1430 				*reason = "unacceptable ah alg";
1431 				ret = B_FALSE;
1432 				break;
1433 			}
1434 		} else if (ah_assoc != NULL) {
1435 			/*
1436 			 * Don't allow this. Check IPSEC NOTE above
1437 			 * ip_fanout_proto().
1438 			 */
1439 			*counter = &ipdrops_spd_got_ah;
1440 			*reason = "unexpected AH";
1441 			ret = B_FALSE;
1442 			break;
1443 		}
1444 		if (ipp->ipp_use_esp) {
1445 			if (esp_assoc == NULL) {
1446 				ret = ipsec_inbound_accept_clear(mp, ipha,
1447 				    ip6h);
1448 				*counter = &ipdrops_spd_got_clear;
1449 				*reason = "unprotected not accepted";
1450 				break;
1451 			}
1452 			ASSERT(esp_assoc != NULL);
1453 			ASSERT(ipp->ipp_encr_alg != 0);
1454 
1455 			if (esp_assoc->ipsa_encr_alg !=
1456 			    ipp->ipp_encr_alg) {
1457 				*counter = &ipdrops_spd_bad_espealg;
1458 				*reason = "unacceptable esp alg";
1459 				ret = B_FALSE;
1460 				break;
1461 			}
1462 			/*
1463 			 * If the client does not need authentication,
1464 			 * we don't verify the alogrithm.
1465 			 */
1466 			if (ipp->ipp_use_espa) {
1467 				if (esp_assoc->ipsa_auth_alg !=
1468 				    ipp->ipp_esp_auth_alg) {
1469 					*counter = &ipdrops_spd_bad_espaalg;
1470 					*reason = "unacceptable esp auth alg";
1471 					ret = B_FALSE;
1472 					break;
1473 				}
1474 			}
1475 		} else if (esp_assoc != NULL) {
1476 				/*
1477 				 * Don't allow this. Check IPSEC NOTE above
1478 				 * ip_fanout_proto().
1479 				 */
1480 			*counter = &ipdrops_spd_got_esp;
1481 			*reason = "unexpected ESP";
1482 			ret = B_FALSE;
1483 			break;
1484 		}
1485 		if (ipp->ipp_use_se) {
1486 			if (!decaps) {
1487 				ret = ipsec_inbound_accept_clear(mp, ipha,
1488 				    ip6h);
1489 				if (!ret) {
1490 					/* XXX mutant? */
1491 					*counter = &ipdrops_spd_bad_selfencap;
1492 					*reason = "self encap not found";
1493 					break;
1494 				}
1495 			}
1496 		} else if (decaps) {
1497 			/*
1498 			 * XXX If the packet comes in tunneled and the
1499 			 * recipient does not expect it to be tunneled, it
1500 			 * is okay. But we drop to be consistent with the
1501 			 * other cases.
1502 			 */
1503 			*counter = &ipdrops_spd_got_selfencap;
1504 			*reason = "unexpected self encap";
1505 			ret = B_FALSE;
1506 			break;
1507 		}
1508 		if (ii->ipsec_in_action != NULL) {
1509 			/*
1510 			 * This can happen if we do a double policy-check on
1511 			 * a packet
1512 			 * XXX XXX should fix this case!
1513 			 */
1514 			IPACT_REFRELE(ii->ipsec_in_action);
1515 		}
1516 		ASSERT(ii->ipsec_in_action == NULL);
1517 		IPACT_REFHOLD(ap);
1518 		ii->ipsec_in_action = ap;
1519 		break;	/* from switch */
1520 	}
1521 	return (ret);
1522 }
1523 
1524 static boolean_t
1525 spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa)
1526 {
1527 	ASSERT(ipl->ipl_ids_latched == B_TRUE);
1528 	return ipsid_equal(ipl->ipl_remote_cid, sa->ipsa_src_cid) &&
1529 	    ipsid_equal(ipl->ipl_local_cid, sa->ipsa_dst_cid);
1530 }
1531 
1532 /*
1533  * Called to check policy on a latched connection, both from this file
1534  * and from tcp.c
1535  */
1536 boolean_t
1537 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl,
1538     ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter)
1539 {
1540 	ASSERT(ipl->ipl_ids_latched == B_TRUE);
1541 
1542 	if ((ii->ipsec_in_ah_sa != NULL) &&
1543 	    (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) {
1544 		*counter = &ipdrops_spd_ah_badid;
1545 		*reason = "AH identity mismatch";
1546 		return (B_FALSE);
1547 	}
1548 
1549 	if ((ii->ipsec_in_esp_sa != NULL) &&
1550 	    (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) {
1551 		*counter = &ipdrops_spd_esp_badid;
1552 		*reason = "ESP identity mismatch";
1553 		return (B_FALSE);
1554 	}
1555 
1556 	if (!ipsec_check_ipsecin_unique(ii, mp, ipha, ip6h, reason, counter))
1557 		return (B_FALSE);
1558 
1559 	return (ipsec_check_ipsecin_action(ii, mp, ipl->ipl_in_action,
1560 	    ipha, ip6h, reason, counter));
1561 }
1562 
1563 /*
1564  * Check to see whether this secured datagram meets the policy
1565  * constraints specified in ipsp.
1566  *
1567  * Called from ipsec_check_global_policy, and ipsec_check_inbound_policy.
1568  *
1569  * Consumes a reference to ipsp.
1570  */
1571 static mblk_t *
1572 ipsec_check_ipsecin_policy(queue_t *q, mblk_t *first_mp, ipsec_policy_t *ipsp,
1573     ipha_t *ipha, ip6_t *ip6h)
1574 {
1575 	ipsec_in_t *ii;
1576 	ipsec_action_t *ap;
1577 	const char *reason = "no policy actions found";
1578 	mblk_t *data_mp, *ipsec_mp;
1579 	kstat_named_t *counter = &ipdrops_spd_got_secure;
1580 
1581 	data_mp = first_mp->b_cont;
1582 	ipsec_mp = first_mp;
1583 
1584 	ASSERT(ipsp != NULL);
1585 
1586 	ASSERT((ipha == NULL && ip6h != NULL) ||
1587 	    (ip6h == NULL && ipha != NULL));
1588 
1589 	ii = (ipsec_in_t *)ipsec_mp->b_rptr;
1590 
1591 	if (ii->ipsec_in_loopback)
1592 		return (ipsec_check_loopback_policy(q, first_mp, B_TRUE, ipsp));
1593 
1594 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
1595 	if (ii->ipsec_in_action != NULL) {
1596 		/*
1597 		 * this can happen if we do a double policy-check on a packet
1598 		 * Would be nice to be able to delete this test..
1599 		 */
1600 		IPACT_REFRELE(ii->ipsec_in_action);
1601 	}
1602 	ASSERT(ii->ipsec_in_action == NULL);
1603 
1604 	if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) {
1605 		reason = "inbound AH and ESP identities differ";
1606 		counter = &ipdrops_spd_ahesp_diffid;
1607 		goto drop;
1608 	}
1609 
1610 	if (!ipsec_check_ipsecin_unique(ii, data_mp, ipha, ip6h,
1611 	    &reason, &counter))
1612 		goto drop;
1613 
1614 	/*
1615 	 * Ok, now loop through the possible actions and see if any
1616 	 * of them work for us.
1617 	 */
1618 
1619 	for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) {
1620 		if (ipsec_check_ipsecin_action(ii, data_mp, ap,
1621 		    ipha, ip6h, &reason, &counter)) {
1622 			BUMP_MIB(&ip_mib, ipsecInSucceeded);
1623 			IPPOL_REFRELE(ipsp);
1624 			return (first_mp);
1625 		}
1626 	}
1627 drop:
1628 	(void) mi_strlog(q, 0, SL_ERROR|SL_WARN|SL_CONSOLE,
1629 	    "ipsec inbound policy mismatch: %s, packet dropped\n",
1630 	    reason);
1631 	IPPOL_REFRELE(ipsp);
1632 	ASSERT(ii->ipsec_in_action == NULL);
1633 	BUMP_MIB(&ip_mib, ipsecInFailed);
1634 	ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper);
1635 	return (NULL);
1636 }
1637 
1638 /*
1639  * sleazy prefix-length-based compare.
1640  * another inlining candidate..
1641  */
1642 static boolean_t
1643 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p)
1644 {
1645 	int offset = pfxlen>>3;
1646 	int bitsleft = pfxlen & 7;
1647 	uint8_t *addr2 = (uint8_t *)addr2p;
1648 
1649 	/*
1650 	 * and there was much evil..
1651 	 * XXX should inline-expand the bcmp here and do this 32 bits
1652 	 * or 64 bits at a time..
1653 	 */
1654 	return ((bcmp(addr1, addr2, offset) == 0) &&
1655 	    ((bitsleft == 0) ||
1656 		(((addr1[offset] ^ addr2[offset]) &
1657 		    (0xff<<(8-bitsleft))) == 0)));
1658 }
1659 
1660 static ipsec_policy_t *
1661 ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain,
1662     ipsec_selector_t *sel, boolean_t is_icmp_inv_acq)
1663 {
1664 	ipsec_selkey_t *isel;
1665 	ipsec_policy_t *p;
1666 	int bpri = best ? best->ipsp_prio : 0;
1667 
1668 	for (p = chain; p != NULL; p = p->ipsp_hash.hash_next) {
1669 		uint32_t valid;
1670 
1671 		if (p->ipsp_prio <= bpri)
1672 			continue;
1673 		isel = &p->ipsp_sel->ipsl_key;
1674 		valid = isel->ipsl_valid;
1675 
1676 		if ((valid & IPSL_PROTOCOL) &&
1677 		    (isel->ipsl_proto != sel->ips_protocol))
1678 			continue;
1679 
1680 		if ((valid & IPSL_REMOTE_ADDR) &&
1681 		    !ip_addr_match((uint8_t *)&isel->ipsl_remote,
1682 			isel->ipsl_remote_pfxlen,
1683 			&sel->ips_remote_addr_v6))
1684 			continue;
1685 
1686 		if ((valid & IPSL_LOCAL_ADDR) &&
1687 		    !ip_addr_match((uint8_t *)&isel->ipsl_local,
1688 			isel->ipsl_local_pfxlen,
1689 			&sel->ips_local_addr_v6))
1690 			continue;
1691 
1692 		if ((valid & IPSL_REMOTE_PORT) &&
1693 		    isel->ipsl_rport != sel->ips_remote_port)
1694 			continue;
1695 
1696 		if ((valid & IPSL_LOCAL_PORT) &&
1697 		    isel->ipsl_lport != sel->ips_local_port)
1698 			continue;
1699 
1700 		if (!is_icmp_inv_acq) {
1701 			if ((valid & IPSL_ICMP_TYPE) &&
1702 			    (isel->ipsl_icmp_type > sel->ips_icmp_type ||
1703 			    isel->ipsl_icmp_type_end < sel->ips_icmp_type)) {
1704 				continue;
1705 			}
1706 
1707 			if ((valid & IPSL_ICMP_CODE) &&
1708 			    (isel->ipsl_icmp_code > sel->ips_icmp_code ||
1709 			    isel->ipsl_icmp_code_end <
1710 			    sel->ips_icmp_code)) {
1711 				continue;
1712 			}
1713 		} else {
1714 			/*
1715 			 * special case for icmp inverse acquire
1716 			 * we only want policies that aren't drop/pass
1717 			 */
1718 			if (p->ipsp_act->ipa_act.ipa_type != IPSEC_ACT_APPLY)
1719 				continue;
1720 		}
1721 
1722 		/* we matched all the packet-port-field selectors! */
1723 		best = p;
1724 		bpri = p->ipsp_prio;
1725 	}
1726 
1727 	return (best);
1728 }
1729 
1730 /*
1731  * Try to find and return the best policy entry under a given policy
1732  * root for a given set of selectors; the first parameter "best" is
1733  * the current best policy so far.  If "best" is non-null, we have a
1734  * reference to it.  We return a reference to a policy; if that policy
1735  * is not the original "best", we need to release that reference
1736  * before returning.
1737  */
1738 static ipsec_policy_t *
1739 ipsec_find_policy_head(ipsec_policy_t *best,
1740     ipsec_policy_head_t *head, int direction, ipsec_selector_t *sel,
1741     int selhash)
1742 {
1743 	ipsec_policy_t *curbest;
1744 	ipsec_policy_root_t *root;
1745 	uint8_t is_icmp_inv_acq = sel->ips_is_icmp_inv_acq;
1746 	int af = sel->ips_isv4 ? IPSEC_AF_V4 : IPSEC_AF_V6;
1747 
1748 	curbest = best;
1749 	root = &head->iph_root[direction];
1750 
1751 #ifdef DEBUG
1752 	if (is_icmp_inv_acq) {
1753 		if (sel->ips_isv4) {
1754 			if (sel->ips_protocol != IPPROTO_ICMP) {
1755 			    cmn_err(CE_WARN, "ipsec_find_policy_head:"
1756 			    " expecting icmp, got %d", sel->ips_protocol);
1757 			}
1758 		} else {
1759 			if (sel->ips_protocol != IPPROTO_ICMPV6) {
1760 				cmn_err(CE_WARN, "ipsec_find_policy_head:"
1761 				" expecting icmpv6, got %d", sel->ips_protocol);
1762 			}
1763 		}
1764 	}
1765 #endif
1766 
1767 	rw_enter(&head->iph_lock, RW_READER);
1768 
1769 	if (root->ipr_nchains > 0) {
1770 		curbest = ipsec_find_policy_chain(curbest,
1771 		    root->ipr_hash[selhash].hash_head, sel, is_icmp_inv_acq);
1772 	}
1773 	curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel,
1774 	    is_icmp_inv_acq);
1775 
1776 	/*
1777 	 * Adjust reference counts if we found anything new.
1778 	 */
1779 	if (curbest != best) {
1780 		ASSERT(curbest != NULL);
1781 		IPPOL_REFHOLD(curbest);
1782 
1783 		if (best != NULL) {
1784 			IPPOL_REFRELE(best);
1785 		}
1786 	}
1787 
1788 	rw_exit(&head->iph_lock);
1789 
1790 	return (curbest);
1791 }
1792 
1793 /*
1794  * Find the best system policy (either global or per-interface) which
1795  * applies to the given selector; look in all the relevant policy roots
1796  * to figure out which policy wins.
1797  *
1798  * Returns a reference to a policy; caller must release this
1799  * reference when done.
1800  */
1801 ipsec_policy_t *
1802 ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io,
1803     ipsec_selector_t *sel)
1804 {
1805 	ipsec_policy_t *p;
1806 	int selhash = selector_hash(sel);
1807 
1808 	p = ipsec_find_policy_head(NULL, &system_policy, direction, sel,
1809 	    selhash);
1810 	if ((connp != NULL) && (connp->conn_policy != NULL)) {
1811 		p = ipsec_find_policy_head(p, connp->conn_policy,
1812 		    direction, sel, selhash);
1813 	} else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) {
1814 		p = ipsec_find_policy_head(p, io->ipsec_out_polhead,
1815 		    direction, sel, selhash);
1816 	}
1817 
1818 	return (p);
1819 }
1820 
1821 /*
1822  * Check with global policy and see whether this inbound
1823  * packet meets the policy constraints.
1824  *
1825  * Locate appropriate policy from global policy, supplemented by the
1826  * conn's configured and/or cached policy if the conn is supplied.
1827  *
1828  * Dispatch to ipsec_check_ipsecin_policy if we have policy and an
1829  * encrypted packet to see if they match.
1830  *
1831  * Otherwise, see if the policy allows cleartext; if not, drop it on the
1832  * floor.
1833  */
1834 mblk_t *
1835 ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp,
1836     ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present)
1837 {
1838 	ipsec_policy_t *p;
1839 	ipsec_selector_t sel;
1840 	queue_t *q = NULL;
1841 	mblk_t *data_mp, *ipsec_mp;
1842 	boolean_t policy_present;
1843 	kstat_named_t *counter;
1844 
1845 	data_mp = mctl_present ? first_mp->b_cont : first_mp;
1846 	ipsec_mp = mctl_present ? first_mp : NULL;
1847 
1848 	sel.ips_is_icmp_inv_acq = 0;
1849 
1850 	ASSERT((ipha == NULL && ip6h != NULL) ||
1851 	    (ip6h == NULL && ipha != NULL));
1852 
1853 	if (ipha != NULL)
1854 		policy_present = ipsec_inbound_v4_policy_present;
1855 	else
1856 		policy_present = ipsec_inbound_v6_policy_present;
1857 
1858 	if (!policy_present && connp == NULL) {
1859 		/*
1860 		 * No global policy and no per-socket policy;
1861 		 * just pass it back (but we shouldn't get here in that case)
1862 		 */
1863 		return (first_mp);
1864 	}
1865 
1866 	if (connp != NULL)
1867 		q = CONNP_TO_WQ(connp);
1868 
1869 	if (ipsec_mp != NULL) {
1870 		ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
1871 		ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)->ipsec_in_type ==
1872 		    IPSEC_IN);
1873 	}
1874 
1875 	/*
1876 	 * If we have cached policy, use it.
1877 	 * Otherwise consult system policy.
1878 	 */
1879 	if ((connp != NULL) && (connp->conn_latch != NULL)) {
1880 		p = connp->conn_latch->ipl_in_policy;
1881 		if (p != NULL) {
1882 			IPPOL_REFHOLD(p);
1883 		}
1884 	} else {
1885 		/* Initialize the ports in the selector */
1886 		if (!ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h)) {
1887 			/*
1888 			 * Technically not a policy mismatch, but it is
1889 			 * an internal failure.
1890 			 */
1891 			ipsec_log_policy_failure(q, IPSEC_POLICY_MISMATCH,
1892 			    "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE);
1893 			counter = &ipdrops_spd_nomem;
1894 			goto fail;
1895 		}
1896 
1897 		/*
1898 		 * Find the policy which best applies.
1899 		 *
1900 		 * If we find global policy, we should look at both
1901 		 * local policy and global policy and see which is
1902 		 * stronger and match accordingly.
1903 		 *
1904 		 * If we don't find a global policy, check with
1905 		 * local policy alone.
1906 		 */
1907 
1908 		p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel);
1909 	}
1910 
1911 	if (p == NULL) {
1912 		if (ipsec_mp == NULL) {
1913 			/*
1914 			 * We have no policy; default to succeeding.
1915 			 * XXX paranoid system design doesn't do this.
1916 			 */
1917 			BUMP_MIB(&ip_mib, ipsecInSucceeded);
1918 			return (first_mp);
1919 		} else {
1920 			counter = &ipdrops_spd_got_secure;
1921 			ipsec_log_policy_failure(q, IPSEC_POLICY_NOT_NEEDED,
1922 			    "ipsec_check_global_policy", ipha, ip6h, B_TRUE);
1923 			goto fail;
1924 		}
1925 	}
1926 	if (ipsec_mp != NULL)
1927 		return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h));
1928 	if (p->ipsp_act->ipa_allow_clear) {
1929 		BUMP_MIB(&ip_mib, ipsecInSucceeded);
1930 		IPPOL_REFRELE(p);
1931 		return (first_mp);
1932 	}
1933 	IPPOL_REFRELE(p);
1934 	/*
1935 	 * If we reach here, we will drop the packet because it failed the
1936 	 * global policy check because the packet was cleartext, and it
1937 	 * should not have been.
1938 	 */
1939 	ipsec_log_policy_failure(q, IPSEC_POLICY_MISMATCH,
1940 	    "ipsec_check_global_policy", ipha, ip6h, B_FALSE);
1941 	counter = &ipdrops_spd_got_clear;
1942 
1943 fail:
1944 	ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper);
1945 	BUMP_MIB(&ip_mib, ipsecInFailed);
1946 	return (NULL);
1947 }
1948 
1949 /*
1950  * We check whether an inbound datagram is a valid one
1951  * to accept in clear. If it is secure, it is the job
1952  * of IPSEC to log information appropriately if it
1953  * suspects that it may not be the real one.
1954  *
1955  * It is called only while fanning out to the ULP
1956  * where ULP accepts only secure data and the incoming
1957  * is clear. Usually we never accept clear datagrams in
1958  * such cases. ICMP is the only exception.
1959  *
1960  * NOTE : We don't call this function if the client (ULP)
1961  * is willing to accept things in clear.
1962  */
1963 boolean_t
1964 ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
1965 {
1966 	ushort_t iph_hdr_length;
1967 	icmph_t *icmph;
1968 	icmp6_t *icmp6;
1969 	uint8_t *nexthdrp;
1970 
1971 	ASSERT((ipha != NULL && ip6h == NULL) ||
1972 	    (ipha == NULL && ip6h != NULL));
1973 
1974 	if (ip6h != NULL) {
1975 		iph_hdr_length = ip_hdr_length_v6(mp, ip6h);
1976 		if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length,
1977 		    &nexthdrp)) {
1978 			return (B_FALSE);
1979 		}
1980 		if (*nexthdrp != IPPROTO_ICMPV6)
1981 			return (B_FALSE);
1982 		icmp6 = (icmp6_t *)(&mp->b_rptr[iph_hdr_length]);
1983 		/* Match IPv6 ICMP policy as closely as IPv4 as possible. */
1984 		switch (icmp6->icmp6_type) {
1985 		case ICMP6_PARAM_PROB:
1986 			/* Corresponds to port/proto unreach in IPv4. */
1987 		case ICMP6_ECHO_REQUEST:
1988 			/* Just like IPv4. */
1989 			return (B_FALSE);
1990 
1991 		case MLD_LISTENER_QUERY:
1992 		case MLD_LISTENER_REPORT:
1993 		case MLD_LISTENER_REDUCTION:
1994 			/*
1995 			 * XXX Seperate NDD in IPv4 what about here?
1996 			 * Plus, mcast is important to ND.
1997 			 */
1998 		case ICMP6_DST_UNREACH:
1999 			/* Corresponds to HOST/NET unreachable in IPv4. */
2000 		case ICMP6_PACKET_TOO_BIG:
2001 		case ICMP6_ECHO_REPLY:
2002 			/* These are trusted in IPv4. */
2003 		case ND_ROUTER_SOLICIT:
2004 		case ND_ROUTER_ADVERT:
2005 		case ND_NEIGHBOR_SOLICIT:
2006 		case ND_NEIGHBOR_ADVERT:
2007 		case ND_REDIRECT:
2008 			/* Trust ND messages for now. */
2009 		case ICMP6_TIME_EXCEEDED:
2010 		default:
2011 			return (B_TRUE);
2012 		}
2013 	} else {
2014 		/*
2015 		 * If it is not ICMP, fail this request.
2016 		 */
2017 		if (ipha->ipha_protocol != IPPROTO_ICMP)
2018 			return (B_FALSE);
2019 		iph_hdr_length = IPH_HDR_LENGTH(ipha);
2020 		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2021 		/*
2022 		 * It is an insecure icmp message. Check to see whether we are
2023 		 * willing to accept this one.
2024 		 */
2025 
2026 		switch (icmph->icmph_type) {
2027 		case ICMP_ECHO_REPLY:
2028 		case ICMP_TIME_STAMP_REPLY:
2029 		case ICMP_INFO_REPLY:
2030 		case ICMP_ROUTER_ADVERTISEMENT:
2031 			/*
2032 			 * We should not encourage clear replies if this
2033 			 * client expects secure. If somebody is replying
2034 			 * in clear some mailicious user watching both the
2035 			 * request and reply, can do chosen-plain-text attacks.
2036 			 * With global policy we might be just expecting secure
2037 			 * but sending out clear. We don't know what the right
2038 			 * thing is. We can't do much here as we can't control
2039 			 * the sender here. Till we are sure of what to do,
2040 			 * accept them.
2041 			 */
2042 			return (B_TRUE);
2043 		case ICMP_ECHO_REQUEST:
2044 		case ICMP_TIME_STAMP_REQUEST:
2045 		case ICMP_INFO_REQUEST:
2046 		case ICMP_ADDRESS_MASK_REQUEST:
2047 		case ICMP_ROUTER_SOLICITATION:
2048 		case ICMP_ADDRESS_MASK_REPLY:
2049 			/*
2050 			 * Don't accept this as somebody could be sending
2051 			 * us plain text to get encrypted data. If we reply,
2052 			 * it will lead to chosen plain text attack.
2053 			 */
2054 			return (B_FALSE);
2055 		case ICMP_DEST_UNREACHABLE:
2056 			switch (icmph->icmph_code) {
2057 			case ICMP_FRAGMENTATION_NEEDED:
2058 				/*
2059 				 * Be in sync with icmp_inbound, where we have
2060 				 * already set ire_max_frag.
2061 				 */
2062 				return (B_TRUE);
2063 			case ICMP_HOST_UNREACHABLE:
2064 			case ICMP_NET_UNREACHABLE:
2065 				/*
2066 				 * By accepting, we could reset a connection.
2067 				 * How do we solve the problem of some
2068 				 * intermediate router sending in-secure ICMP
2069 				 * messages ?
2070 				 */
2071 				return (B_TRUE);
2072 			case ICMP_PORT_UNREACHABLE:
2073 			case ICMP_PROTOCOL_UNREACHABLE:
2074 			default :
2075 				return (B_FALSE);
2076 			}
2077 		case ICMP_SOURCE_QUENCH:
2078 			/*
2079 			 * If this is an attack, TCP will slow start
2080 			 * because of this. Is it very harmful ?
2081 			 */
2082 			return (B_TRUE);
2083 		case ICMP_PARAM_PROBLEM:
2084 			return (B_FALSE);
2085 		case ICMP_TIME_EXCEEDED:
2086 			return (B_TRUE);
2087 		case ICMP_REDIRECT:
2088 			return (B_FALSE);
2089 		default :
2090 			return (B_FALSE);
2091 		}
2092 	}
2093 }
2094 
2095 void
2096 ipsec_latch_ids(ipsec_latch_t *ipl, ipsid_t *local, ipsid_t *remote)
2097 {
2098 	mutex_enter(&ipl->ipl_lock);
2099 
2100 	if (ipl->ipl_ids_latched) {
2101 		/* I lost, someone else got here before me */
2102 		mutex_exit(&ipl->ipl_lock);
2103 		return;
2104 	}
2105 
2106 	if (local != NULL)
2107 		IPSID_REFHOLD(local);
2108 	if (remote != NULL)
2109 		IPSID_REFHOLD(remote);
2110 
2111 	ipl->ipl_local_cid = local;
2112 	ipl->ipl_remote_cid = remote;
2113 	ipl->ipl_ids_latched = B_TRUE;
2114 	mutex_exit(&ipl->ipl_lock);
2115 }
2116 
2117 void
2118 ipsec_latch_inbound(ipsec_latch_t *ipl, ipsec_in_t *ii)
2119 {
2120 	ipsa_t *sa;
2121 
2122 	if (!ipl->ipl_ids_latched) {
2123 		ipsid_t *local = NULL;
2124 		ipsid_t *remote = NULL;
2125 
2126 		if (!ii->ipsec_in_loopback) {
2127 			if (ii->ipsec_in_esp_sa != NULL)
2128 				sa = ii->ipsec_in_esp_sa;
2129 			else
2130 				sa = ii->ipsec_in_ah_sa;
2131 			ASSERT(sa != NULL);
2132 			local = sa->ipsa_dst_cid;
2133 			remote = sa->ipsa_src_cid;
2134 		}
2135 		ipsec_latch_ids(ipl, local, remote);
2136 	}
2137 	ipl->ipl_in_action = ii->ipsec_in_action;
2138 	IPACT_REFHOLD(ipl->ipl_in_action);
2139 }
2140 
2141 /*
2142  * Check whether the policy constraints are met either for an
2143  * inbound datagram; called from IP in numerous places.
2144  *
2145  * Note that this is not a chokepoint for inbound policy checks;
2146  * see also ipsec_check_ipsecin_latch() and ipsec_check_global_policy()
2147  */
2148 mblk_t *
2149 ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp,
2150     ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present)
2151 {
2152 	ipsec_in_t *ii;
2153 	boolean_t ret;
2154 	mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp;
2155 	mblk_t *ipsec_mp = mctl_present ? first_mp : NULL;
2156 	ipsec_latch_t *ipl;
2157 
2158 	ASSERT(connp != NULL);
2159 	ipl = connp->conn_latch;
2160 
2161 	if (ipsec_mp == NULL) {
2162 		/*
2163 		 * This is the case where the incoming datagram is
2164 		 * cleartext and we need to see whether this client
2165 		 * would like to receive such untrustworthy things from
2166 		 * the wire.
2167 		 */
2168 		ASSERT(mp != NULL);
2169 
2170 		if (ipl != NULL) {
2171 			/*
2172 			 * Policy is cached in the conn.
2173 			 */
2174 			if ((ipl->ipl_in_policy != NULL) &&
2175 			    (!ipl->ipl_in_policy->ipsp_act->ipa_allow_clear)) {
2176 				ret = ipsec_inbound_accept_clear(mp,
2177 				    ipha, ip6h);
2178 				if (ret) {
2179 					BUMP_MIB(&ip_mib, ipsecInSucceeded);
2180 					return (first_mp);
2181 				} else {
2182 					ip_drop_packet(first_mp, B_TRUE, NULL,
2183 					    NULL, &ipdrops_spd_got_clear,
2184 					    &spd_dropper);
2185 					ipsec_log_policy_failure(
2186 					    CONNP_TO_WQ(connp),
2187 					    IPSEC_POLICY_MISMATCH,
2188 					    "ipsec_check_inbound_policy", ipha,
2189 					    ip6h, B_FALSE);
2190 					BUMP_MIB(&ip_mib, ipsecInFailed);
2191 					return (NULL);
2192 				}
2193 			} else {
2194 				BUMP_MIB(&ip_mib, ipsecInSucceeded);
2195 				return (first_mp);
2196 			}
2197 		} else {
2198 			/*
2199 			 * As this is a non-hardbound connection we need
2200 			 * to look at both per-socket policy and global
2201 			 * policy. As this is cleartext, mark the mp as
2202 			 * M_DATA in case if it is an ICMP error being
2203 			 * reported before calling ipsec_check_global_policy
2204 			 * so that it does not mistake it for IPSEC_IN.
2205 			 */
2206 			uchar_t db_type = mp->b_datap->db_type;
2207 			mp->b_datap->db_type = M_DATA;
2208 			first_mp = ipsec_check_global_policy(first_mp, connp,
2209 			    ipha, ip6h, mctl_present);
2210 			if (first_mp != NULL)
2211 				mp->b_datap->db_type = db_type;
2212 			return (first_mp);
2213 		}
2214 	}
2215 	/*
2216 	 * If it is inbound check whether the attached message
2217 	 * is secure or not. We have a special case for ICMP,
2218 	 * where we have a IPSEC_IN message and the attached
2219 	 * message is not secure. See icmp_inbound_error_fanout
2220 	 * for details.
2221 	 */
2222 	ASSERT(ipsec_mp != NULL);
2223 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
2224 	ii = (ipsec_in_t *)ipsec_mp->b_rptr;
2225 
2226 	/*
2227 	 * mp->b_cont could be either a M_CTL message
2228 	 * for icmp errors being sent up or a M_DATA message.
2229 	 */
2230 	ASSERT(mp->b_datap->db_type == M_CTL ||
2231 	    mp->b_datap->db_type == M_DATA);
2232 
2233 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
2234 
2235 	if (ipl == NULL) {
2236 		/*
2237 		 * We don't have policies cached in the conn
2238 		 * for this stream. So, look at the global
2239 		 * policy. It will check against conn or global
2240 		 * depending on whichever is stronger.
2241 		 */
2242 		return (ipsec_check_global_policy(first_mp, connp,
2243 		    ipha, ip6h, mctl_present));
2244 	}
2245 
2246 	if (ipl->ipl_in_action != NULL) {
2247 		/* Policy is cached & latched; fast(er) path */
2248 		const char *reason;
2249 		kstat_named_t *counter;
2250 		if (ipsec_check_ipsecin_latch(ii, mp, ipl,
2251 		    ipha, ip6h, &reason, &counter)) {
2252 			BUMP_MIB(&ip_mib, ipsecInSucceeded);
2253 			return (first_mp);
2254 		}
2255 		(void) mi_strlog(CONNP_TO_WQ(connp), 0,
2256 		    SL_ERROR|SL_WARN|SL_CONSOLE,
2257 		    "ipsec inbound policy mismatch: %s, packet dropped\n",
2258 		    reason);
2259 		ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter,
2260 		    &spd_dropper);
2261 		BUMP_MIB(&ip_mib, ipsecInFailed);
2262 		return (NULL);
2263 	} else if (ipl->ipl_in_policy == NULL) {
2264 		ipsec_weird_null_inbound_policy++;
2265 		return (first_mp);
2266 	}
2267 
2268 	IPPOL_REFHOLD(ipl->ipl_in_policy);
2269 	first_mp = ipsec_check_ipsecin_policy(CONNP_TO_WQ(connp), first_mp,
2270 	    ipl->ipl_in_policy, ipha, ip6h);
2271 	/*
2272 	 * NOTE: ipsecIn{Failed,Succeeeded} bumped by
2273 	 * ipsec_check_ipsecin_policy().
2274 	 */
2275 	if (first_mp != NULL)
2276 		ipsec_latch_inbound(ipl, ii);
2277 	return (first_mp);
2278 }
2279 
2280 boolean_t
2281 ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
2282     ipha_t *ipha, ip6_t *ip6h)
2283 {
2284 	uint16_t *ports;
2285 	ushort_t hdr_len;
2286 	mblk_t *spare_mp = NULL;
2287 	uint8_t *nexthdrp;
2288 	uint8_t nexthdr;
2289 	uint8_t *typecode;
2290 	uint8_t check_proto;
2291 
2292 	ASSERT((ipha == NULL && ip6h != NULL) ||
2293 	    (ipha != NULL && ip6h == NULL));
2294 
2295 	if (ip6h != NULL) {
2296 		check_proto = IPPROTO_ICMPV6;
2297 		sel->ips_isv4 = B_FALSE;
2298 		sel->ips_local_addr_v6 = ip6h->ip6_dst;
2299 		sel->ips_remote_addr_v6 = ip6h->ip6_src;
2300 
2301 		nexthdr = ip6h->ip6_nxt;
2302 		switch (nexthdr) {
2303 		case IPPROTO_HOPOPTS:
2304 		case IPPROTO_ROUTING:
2305 		case IPPROTO_DSTOPTS:
2306 			/*
2307 			 * Use ip_hdr_length_nexthdr_v6().  And have a spare
2308 			 * mblk that's contiguous to feed it
2309 			 */
2310 			if ((spare_mp = msgpullup(mp, -1)) == NULL)
2311 				return (B_FALSE);
2312 			if (!ip_hdr_length_nexthdr_v6(spare_mp,
2313 			    (ip6_t *)spare_mp->b_rptr, &hdr_len, &nexthdrp)) {
2314 				/* Malformed packet - XXX ip_drop_packet()? */
2315 				freemsg(spare_mp);
2316 				return (B_FALSE);
2317 			}
2318 			nexthdr = *nexthdrp;
2319 			/* We can just extract based on hdr_len now. */
2320 			break;
2321 		default:
2322 			hdr_len = IPV6_HDR_LEN;
2323 			break;
2324 		}
2325 	} else {
2326 		check_proto = IPPROTO_ICMP;
2327 		sel->ips_isv4 = B_TRUE;
2328 		sel->ips_local_addr_v4 = ipha->ipha_dst;
2329 		sel->ips_remote_addr_v4 = ipha->ipha_src;
2330 		nexthdr = ipha->ipha_protocol;
2331 		hdr_len = IPH_HDR_LENGTH(ipha);
2332 	}
2333 	sel->ips_protocol = nexthdr;
2334 
2335 	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
2336 	    nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
2337 		sel->ips_remote_port = sel->ips_local_port = 0;
2338 		freemsg(spare_mp);	/* Always works, even if NULL. */
2339 		return (B_TRUE);
2340 	}
2341 
2342 	if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
2343 		/* If we didn't pullup a copy already, do so now. */
2344 		/*
2345 		 * XXX performance, will upper-layers frequently split TCP/UDP
2346 		 * apart from IP or options?  If so, perhaps we should revisit
2347 		 * the spare_mp strategy.
2348 		 */
2349 		ipsec_hdr_pullup_needed++;
2350 		if (spare_mp == NULL &&
2351 		    (spare_mp = msgpullup(mp, -1)) == NULL) {
2352 			return (B_FALSE);
2353 		}
2354 		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
2355 	} else {
2356 		ports = (uint16_t *)&mp->b_rptr[hdr_len];
2357 	}
2358 
2359 	if (nexthdr == check_proto) {
2360 		typecode = (uint8_t *)ports;
2361 		sel->ips_icmp_type = *typecode++;
2362 		sel->ips_icmp_code = *typecode;
2363 		sel->ips_remote_port = sel->ips_local_port = 0;
2364 		freemsg(spare_mp);	/* Always works, even if NULL */
2365 		return (B_TRUE);
2366 	}
2367 
2368 	sel->ips_remote_port = *ports++;
2369 	sel->ips_local_port = *ports;
2370 	freemsg(spare_mp);	/* Always works, even if NULL */
2371 	return (B_TRUE);
2372 }
2373 
2374 static boolean_t
2375 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
2376     ip6_t *ip6h)
2377 {
2378 	/*
2379 	 * XXX cut&paste shared with ipsec_init_inbound_sel
2380 	 */
2381 	uint16_t *ports;
2382 	ushort_t hdr_len;
2383 	mblk_t *spare_mp = NULL;
2384 	uint8_t *nexthdrp;
2385 	uint8_t nexthdr;
2386 	uint8_t *typecode;
2387 	uint8_t check_proto;
2388 
2389 	ASSERT((ipha == NULL && ip6h != NULL) ||
2390 	    (ipha != NULL && ip6h == NULL));
2391 
2392 	if (ip6h != NULL) {
2393 		check_proto = IPPROTO_ICMPV6;
2394 		nexthdr = ip6h->ip6_nxt;
2395 		switch (nexthdr) {
2396 		case IPPROTO_HOPOPTS:
2397 		case IPPROTO_ROUTING:
2398 		case IPPROTO_DSTOPTS:
2399 			/*
2400 			 * Use ip_hdr_length_nexthdr_v6().  And have a spare
2401 			 * mblk that's contiguous to feed it
2402 			 */
2403 			spare_mp = msgpullup(mp, -1);
2404 			if (spare_mp == NULL ||
2405 			    !ip_hdr_length_nexthdr_v6(spare_mp,
2406 				(ip6_t *)spare_mp->b_rptr, &hdr_len,
2407 				&nexthdrp)) {
2408 				/* Always works, even if NULL. */
2409 				freemsg(spare_mp);
2410 				freemsg(mp);
2411 				return (B_FALSE);
2412 			} else {
2413 				nexthdr = *nexthdrp;
2414 				/* We can just extract based on hdr_len now. */
2415 			}
2416 			break;
2417 		default:
2418 			hdr_len = IPV6_HDR_LEN;
2419 			break;
2420 		}
2421 	} else {
2422 		check_proto = IPPROTO_ICMP;
2423 		hdr_len = IPH_HDR_LENGTH(ipha);
2424 		nexthdr = ipha->ipha_protocol;
2425 	}
2426 
2427 	sel->ips_protocol = nexthdr;
2428 	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
2429 	    nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
2430 		sel->ips_local_port = sel->ips_remote_port = 0;
2431 		freemsg(spare_mp);  /* Always works, even if NULL. */
2432 		return (B_TRUE);
2433 	}
2434 
2435 	if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
2436 		/* If we didn't pullup a copy already, do so now. */
2437 		/*
2438 		 * XXX performance, will upper-layers frequently split TCP/UDP
2439 		 * apart from IP or options?  If so, perhaps we should revisit
2440 		 * the spare_mp strategy.
2441 		 *
2442 		 * XXX should this be msgpullup(mp, hdr_len+4) ???
2443 		 */
2444 		if (spare_mp == NULL &&
2445 		    (spare_mp = msgpullup(mp, -1)) == NULL) {
2446 			freemsg(mp);
2447 			return (B_FALSE);
2448 		}
2449 		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
2450 	} else {
2451 		ports = (uint16_t *)&mp->b_rptr[hdr_len];
2452 	}
2453 
2454 	if (nexthdr == check_proto) {
2455 		typecode = (uint8_t *)ports;
2456 		sel->ips_icmp_type = *typecode++;
2457 		sel->ips_icmp_code = *typecode;
2458 		sel->ips_remote_port = sel->ips_local_port = 0;
2459 		freemsg(spare_mp);	/* Always works, even if NULL */
2460 		return (B_TRUE);
2461 	}
2462 
2463 	sel->ips_local_port = *ports++;
2464 	sel->ips_remote_port = *ports;
2465 	freemsg(spare_mp);	/* Always works, even if NULL */
2466 	return (B_TRUE);
2467 }
2468 
2469 /*
2470  * Create an ipsec_action_t based on the way an inbound packet was protected.
2471  * Used to reflect traffic back to a sender.
2472  *
2473  * We don't bother interning the action into the hash table.
2474  */
2475 ipsec_action_t *
2476 ipsec_in_to_out_action(ipsec_in_t *ii)
2477 {
2478 	ipsa_t *ah_assoc, *esp_assoc;
2479 	uint_t auth_alg = 0, encr_alg = 0, espa_alg = 0;
2480 	ipsec_action_t *ap;
2481 	boolean_t unique;
2482 
2483 	ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP);
2484 
2485 	if (ap == NULL)
2486 		return (NULL);
2487 
2488 	bzero(ap, sizeof (*ap));
2489 	HASH_NULL(ap, ipa_hash);
2490 	ap->ipa_next = NULL;
2491 	ap->ipa_refs = 1;
2492 
2493 	/*
2494 	 * Get the algorithms that were used for this packet.
2495 	 */
2496 	ap->ipa_act.ipa_type = IPSEC_ACT_APPLY;
2497 	ap->ipa_act.ipa_log = 0;
2498 	ah_assoc = ii->ipsec_in_ah_sa;
2499 	ap->ipa_act.ipa_apply.ipp_use_ah = (ah_assoc != NULL);
2500 
2501 	esp_assoc = ii->ipsec_in_esp_sa;
2502 	ap->ipa_act.ipa_apply.ipp_use_esp = (esp_assoc != NULL);
2503 
2504 	if (esp_assoc != NULL) {
2505 		encr_alg = esp_assoc->ipsa_encr_alg;
2506 		espa_alg = esp_assoc->ipsa_auth_alg;
2507 		ap->ipa_act.ipa_apply.ipp_use_espa = (espa_alg != 0);
2508 	}
2509 	if (ah_assoc != NULL)
2510 		auth_alg = ah_assoc->ipsa_auth_alg;
2511 
2512 	ap->ipa_act.ipa_apply.ipp_encr_alg = (uint8_t)encr_alg;
2513 	ap->ipa_act.ipa_apply.ipp_auth_alg = (uint8_t)auth_alg;
2514 	ap->ipa_act.ipa_apply.ipp_esp_auth_alg = (uint8_t)espa_alg;
2515 	ap->ipa_act.ipa_apply.ipp_use_se = ii->ipsec_in_decaps;
2516 	unique = B_FALSE;
2517 
2518 	if (esp_assoc != NULL) {
2519 		ap->ipa_act.ipa_apply.ipp_espa_minbits =
2520 		    esp_assoc->ipsa_authkeybits;
2521 		ap->ipa_act.ipa_apply.ipp_espa_maxbits =
2522 		    esp_assoc->ipsa_authkeybits;
2523 		ap->ipa_act.ipa_apply.ipp_espe_minbits =
2524 		    esp_assoc->ipsa_encrkeybits;
2525 		ap->ipa_act.ipa_apply.ipp_espe_maxbits =
2526 		    esp_assoc->ipsa_encrkeybits;
2527 		ap->ipa_act.ipa_apply.ipp_km_proto = esp_assoc->ipsa_kmp;
2528 		ap->ipa_act.ipa_apply.ipp_km_cookie = esp_assoc->ipsa_kmc;
2529 		if (esp_assoc->ipsa_flags & IPSA_F_UNIQUE)
2530 			unique = B_TRUE;
2531 	}
2532 	if (ah_assoc != NULL) {
2533 		ap->ipa_act.ipa_apply.ipp_ah_minbits =
2534 		    ah_assoc->ipsa_authkeybits;
2535 		ap->ipa_act.ipa_apply.ipp_ah_maxbits =
2536 		    ah_assoc->ipsa_authkeybits;
2537 		ap->ipa_act.ipa_apply.ipp_km_proto = ah_assoc->ipsa_kmp;
2538 		ap->ipa_act.ipa_apply.ipp_km_cookie = ah_assoc->ipsa_kmc;
2539 		if (ah_assoc->ipsa_flags & IPSA_F_UNIQUE)
2540 			unique = B_TRUE;
2541 	}
2542 	ap->ipa_act.ipa_apply.ipp_use_unique = unique;
2543 	ap->ipa_want_unique = unique;
2544 	ap->ipa_allow_clear = B_FALSE;
2545 	ap->ipa_want_se = ii->ipsec_in_decaps;
2546 	ap->ipa_want_ah = (ah_assoc != NULL);
2547 	ap->ipa_want_esp = (esp_assoc != NULL);
2548 
2549 	ap->ipa_ovhd = ipsec_act_ovhd(&ap->ipa_act);
2550 
2551 	ap->ipa_act.ipa_apply.ipp_replay_depth = 0; /* don't care */
2552 
2553 	return (ap);
2554 }
2555 
2556 
2557 /*
2558  * Compute the worst-case amount of extra space required by an action.
2559  * Note that, because of the ESP considerations listed below, this is
2560  * actually not the same as the best-case reduction in the MTU; in the
2561  * future, we should pass additional information to this function to
2562  * allow the actual MTU impact to be computed.
2563  *
2564  * AH: Revisit this if we implement algorithms with
2565  * a verifier size of more than 12 bytes.
2566  *
2567  * ESP: A more exact but more messy computation would take into
2568  * account the interaction between the cipher block size and the
2569  * effective MTU, yielding the inner payload size which reflects a
2570  * packet with *minimum* ESP padding..
2571  */
2572 static int32_t
2573 ipsec_act_ovhd(const ipsec_act_t *act)
2574 {
2575 	int32_t overhead = 0;
2576 
2577 	if (act->ipa_type == IPSEC_ACT_APPLY) {
2578 		const ipsec_prot_t *ipp = &act->ipa_apply;
2579 
2580 		if (ipp->ipp_use_ah)
2581 			overhead += IPSEC_MAX_AH_HDR_SIZE;
2582 		if (ipp->ipp_use_esp) {
2583 			overhead += IPSEC_MAX_ESP_HDR_SIZE;
2584 			overhead += sizeof (struct udphdr);
2585 		}
2586 		if (ipp->ipp_use_se)
2587 			overhead += IP_SIMPLE_HDR_LENGTH;
2588 	}
2589 	return (overhead);
2590 }
2591 
2592 /*
2593  * This hash function is used only when creating policies and thus is not
2594  * performance-critical for packet flows.
2595  *
2596  * Future work: canonicalize the structures hashed with this (i.e.,
2597  * zeroize padding) so the hash works correctly.
2598  */
2599 /* ARGSUSED */
2600 static uint32_t
2601 policy_hash(int size, const void *start, const void *end)
2602 {
2603 	return (0);
2604 }
2605 
2606 
2607 /*
2608  * Hash function macros for each address type.
2609  *
2610  * The IPV6 hash function assumes that the low order 32-bits of the
2611  * address (typically containing the low order 24 bits of the mac
2612  * address) are reasonably well-distributed.  Revisit this if we run
2613  * into trouble from lots of collisions on ::1 addresses and the like
2614  * (seems unlikely).
2615  */
2616 #define	IPSEC_IPV4_HASH(a) ((a) % ipsec_spd_hashsize)
2617 #define	IPSEC_IPV6_HASH(a) ((a.s6_addr32[3]) % ipsec_spd_hashsize)
2618 
2619 /*
2620  * These two hash functions should produce coordinated values
2621  * but have slightly different roles.
2622  */
2623 static uint32_t
2624 selkey_hash(const ipsec_selkey_t *selkey)
2625 {
2626 	uint32_t valid = selkey->ipsl_valid;
2627 
2628 	if (!(valid & IPSL_REMOTE_ADDR))
2629 		return (IPSEC_SEL_NOHASH);
2630 
2631 	if (valid & IPSL_IPV4) {
2632 		if (selkey->ipsl_remote_pfxlen == 32)
2633 			return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4));
2634 	}
2635 	if (valid & IPSL_IPV6) {
2636 		if (selkey->ipsl_remote_pfxlen == 128)
2637 			return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6));
2638 	}
2639 	return (IPSEC_SEL_NOHASH);
2640 }
2641 
2642 static uint32_t
2643 selector_hash(ipsec_selector_t *sel)
2644 {
2645 	if (sel->ips_isv4) {
2646 		return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4));
2647 	}
2648 	return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6));
2649 }
2650 
2651 /*
2652  * Intern actions into the action hash table.
2653  */
2654 ipsec_action_t *
2655 ipsec_act_find(const ipsec_act_t *a, int n)
2656 {
2657 	int i;
2658 	uint32_t hval;
2659 	ipsec_action_t *ap;
2660 	ipsec_action_t *prev = NULL;
2661 	int32_t overhead, maxovhd = 0;
2662 	boolean_t allow_clear = B_FALSE;
2663 	boolean_t want_ah = B_FALSE;
2664 	boolean_t want_esp = B_FALSE;
2665 	boolean_t want_se = B_FALSE;
2666 	boolean_t want_unique = B_FALSE;
2667 
2668 	/*
2669 	 * TODO: should canonicalize a[] (i.e., zeroize any padding)
2670 	 * so we can use a non-trivial policy_hash function.
2671 	 */
2672 	for (i = n-1; i >= 0; i--) {
2673 		hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]);
2674 
2675 		HASH_LOCK(ipsec_action_hash, hval);
2676 
2677 		for (HASH_ITERATE(ap, ipa_hash, ipsec_action_hash, hval)) {
2678 			if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0)
2679 				continue;
2680 			if (ap->ipa_next != prev)
2681 				continue;
2682 			break;
2683 		}
2684 		if (ap != NULL) {
2685 			HASH_UNLOCK(ipsec_action_hash, hval);
2686 			prev = ap;
2687 			continue;
2688 		}
2689 		/*
2690 		 * need to allocate a new one..
2691 		 */
2692 		ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP);
2693 		if (ap == NULL) {
2694 			HASH_UNLOCK(ipsec_action_hash, hval);
2695 			if (prev != NULL)
2696 				ipsec_action_free(prev);
2697 			return (NULL);
2698 		}
2699 		HASH_INSERT(ap, ipa_hash, ipsec_action_hash, hval);
2700 
2701 		ap->ipa_next = prev;
2702 		ap->ipa_act = a[i];
2703 
2704 		overhead = ipsec_act_ovhd(&a[i]);
2705 		if (maxovhd < overhead)
2706 			maxovhd = overhead;
2707 
2708 		if ((a[i].ipa_type == IPSEC_ACT_BYPASS) ||
2709 		    (a[i].ipa_type == IPSEC_ACT_CLEAR))
2710 			allow_clear = B_TRUE;
2711 		if (a[i].ipa_type == IPSEC_ACT_APPLY) {
2712 			const ipsec_prot_t *ipp = &a[i].ipa_apply;
2713 
2714 			ASSERT(ipp->ipp_use_ah || ipp->ipp_use_esp);
2715 			want_ah |= ipp->ipp_use_ah;
2716 			want_esp |= ipp->ipp_use_esp;
2717 			want_se |= ipp->ipp_use_se;
2718 			want_unique |= ipp->ipp_use_unique;
2719 		}
2720 		ap->ipa_allow_clear = allow_clear;
2721 		ap->ipa_want_ah = want_ah;
2722 		ap->ipa_want_esp = want_esp;
2723 		ap->ipa_want_se = want_se;
2724 		ap->ipa_want_unique = want_unique;
2725 		ap->ipa_refs = 1; /* from the hash table */
2726 		ap->ipa_ovhd = maxovhd;
2727 		if (prev)
2728 			prev->ipa_refs++;
2729 		prev = ap;
2730 		HASH_UNLOCK(ipsec_action_hash, hval);
2731 	}
2732 
2733 	ap->ipa_refs++;		/* caller's reference */
2734 
2735 	return (ap);
2736 }
2737 
2738 /*
2739  * Called when refcount goes to 0, indicating that all references to this
2740  * node are gone.
2741  *
2742  * This does not unchain the action from the hash table.
2743  */
2744 void
2745 ipsec_action_free(ipsec_action_t *ap)
2746 {
2747 	for (;;) {
2748 		ipsec_action_t *np = ap->ipa_next;
2749 		ASSERT(ap->ipa_refs == 0);
2750 		ASSERT(ap->ipa_hash.hash_pp == NULL);
2751 		kmem_cache_free(ipsec_action_cache, ap);
2752 		ap = np;
2753 		/* Inlined IPACT_REFRELE -- avoid recursion */
2754 		if (ap == NULL)
2755 			break;
2756 		membar_exit();
2757 		if (atomic_add_32_nv(&(ap)->ipa_refs, -1) != 0)
2758 			break;
2759 		/* End inlined IPACT_REFRELE */
2760 	}
2761 }
2762 
2763 /*
2764  * Periodically sweep action hash table for actions with refcount==1, and
2765  * nuke them.  We cannot do this "on demand" (i.e., from IPACT_REFRELE)
2766  * because we can't close the race between another thread finding the action
2767  * in the hash table without holding the bucket lock during IPACT_REFRELE.
2768  * Instead, we run this function sporadically to clean up after ourselves;
2769  * we also set it as the "reclaim" function for the action kmem_cache.
2770  *
2771  * Note that it may take several passes of ipsec_action_gc() to free all
2772  * "stale" actions.
2773  */
2774 /* ARGSUSED */
2775 static void
2776 ipsec_action_reclaim(void *dummy)
2777 {
2778 	int i;
2779 
2780 	for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) {
2781 		ipsec_action_t *ap, *np;
2782 
2783 		/* skip the lock if nobody home */
2784 		if (ipsec_action_hash[i].hash_head == NULL)
2785 			continue;
2786 
2787 		HASH_LOCK(ipsec_action_hash, i);
2788 		for (ap = ipsec_action_hash[i].hash_head;
2789 		    ap != NULL; ap = np) {
2790 			ASSERT(ap->ipa_refs > 0);
2791 			np = ap->ipa_hash.hash_next;
2792 			if (ap->ipa_refs > 1)
2793 				continue;
2794 			HASH_UNCHAIN(ap, ipa_hash, ipsec_action_hash, i);
2795 			IPACT_REFRELE(ap);
2796 		}
2797 		HASH_UNLOCK(ipsec_action_hash, i);
2798 	}
2799 }
2800 
2801 /*
2802  * Intern a selector set into the selector set hash table.
2803  * This is simpler than the actions case..
2804  */
2805 static ipsec_sel_t *
2806 ipsec_find_sel(ipsec_selkey_t *selkey)
2807 {
2808 	ipsec_sel_t *sp;
2809 	uint32_t hval, bucket;
2810 
2811 	/*
2812 	 * Exactly one AF bit should be set in selkey.
2813 	 */
2814 	ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^
2815 	    !(selkey->ipsl_valid & IPSL_IPV6));
2816 
2817 	hval = selkey_hash(selkey);
2818 	selkey->ipsl_hval = hval;
2819 
2820 	bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval;
2821 
2822 	ASSERT(!HASH_LOCKED(ipsec_sel_hash, bucket));
2823 	HASH_LOCK(ipsec_sel_hash, bucket);
2824 
2825 	for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) {
2826 		if (bcmp(&sp->ipsl_key, selkey, sizeof (*selkey)) == 0)
2827 			break;
2828 	}
2829 	if (sp != NULL) {
2830 		sp->ipsl_refs++;
2831 
2832 		HASH_UNLOCK(ipsec_sel_hash, bucket);
2833 		return (sp);
2834 	}
2835 
2836 	sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP);
2837 	if (sp == NULL) {
2838 		HASH_UNLOCK(ipsec_sel_hash, bucket);
2839 		return (NULL);
2840 	}
2841 
2842 	HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket);
2843 	sp->ipsl_refs = 2;	/* one for hash table, one for caller */
2844 	sp->ipsl_key = *selkey;
2845 
2846 	HASH_UNLOCK(ipsec_sel_hash, bucket);
2847 
2848 	return (sp);
2849 }
2850 
2851 static void
2852 ipsec_sel_rel(ipsec_sel_t **spp)
2853 {
2854 	ipsec_sel_t *sp = *spp;
2855 	int hval = sp->ipsl_key.ipsl_hval;
2856 	*spp = NULL;
2857 
2858 	if (hval == IPSEC_SEL_NOHASH)
2859 		hval = 0;
2860 
2861 	ASSERT(!HASH_LOCKED(ipsec_sel_hash, hval));
2862 	HASH_LOCK(ipsec_sel_hash, hval);
2863 	if (--sp->ipsl_refs == 1) {
2864 		HASH_UNCHAIN(sp, ipsl_hash, ipsec_sel_hash, hval);
2865 		sp->ipsl_refs--;
2866 		HASH_UNLOCK(ipsec_sel_hash, hval);
2867 		ASSERT(sp->ipsl_refs == 0);
2868 		kmem_cache_free(ipsec_sel_cache, sp);
2869 		/* Caller unlocks */
2870 		return;
2871 	}
2872 
2873 	HASH_UNLOCK(ipsec_sel_hash, hval);
2874 }
2875 
2876 /*
2877  * Free a policy rule which we know is no longer being referenced.
2878  */
2879 void
2880 ipsec_policy_free(ipsec_policy_t *ipp)
2881 {
2882 	ASSERT(ipp->ipsp_refs == 0);
2883 	ASSERT(ipp->ipsp_sel != NULL);
2884 	ASSERT(ipp->ipsp_act != NULL);
2885 	ipsec_sel_rel(&ipp->ipsp_sel);
2886 	IPACT_REFRELE(ipp->ipsp_act);
2887 	kmem_cache_free(ipsec_pol_cache, ipp);
2888 }
2889 
2890 /*
2891  * Construction of new policy rules; construct a policy, and add it to
2892  * the appropriate tables.
2893  */
2894 ipsec_policy_t *
2895 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a,
2896     int nacts, int prio)
2897 {
2898 	ipsec_action_t *ap;
2899 	ipsec_sel_t *sp;
2900 	ipsec_policy_t *ipp;
2901 
2902 	ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP);
2903 	ap = ipsec_act_find(a, nacts);
2904 	sp = ipsec_find_sel(keys);
2905 
2906 	if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) {
2907 		if (ap != NULL) {
2908 			IPACT_REFRELE(ap);
2909 		}
2910 		if (sp != NULL)
2911 			ipsec_sel_rel(&sp);
2912 		if (ipp != NULL)
2913 			kmem_cache_free(ipsec_pol_cache, ipp);
2914 		return (NULL);
2915 	}
2916 
2917 	HASH_NULL(ipp, ipsp_hash);
2918 
2919 	ipp->ipsp_refs = 1;	/* caller's reference */
2920 	ipp->ipsp_sel = sp;
2921 	ipp->ipsp_act = ap;
2922 	ipp->ipsp_prio = prio;	/* rule priority */
2923 	ipp->ipsp_index = ipsec_next_policy_index++;
2924 
2925 	return (ipp);
2926 }
2927 
2928 static void
2929 ipsec_update_present_flags()
2930 {
2931 	boolean_t hashpol = (avl_numnodes(&system_policy.iph_rulebyid) > 0);
2932 
2933 	if (hashpol) {
2934 		ipsec_outbound_v4_policy_present = B_TRUE;
2935 		ipsec_outbound_v6_policy_present = B_TRUE;
2936 		ipsec_inbound_v4_policy_present = B_TRUE;
2937 		ipsec_inbound_v6_policy_present = B_TRUE;
2938 		return;
2939 	}
2940 
2941 	ipsec_outbound_v4_policy_present = (NULL !=
2942 	    system_policy.iph_root[IPSEC_TYPE_OUTBOUND].
2943 	    ipr_nonhash[IPSEC_AF_V4]);
2944 	ipsec_outbound_v6_policy_present = (NULL !=
2945 	    system_policy.iph_root[IPSEC_TYPE_OUTBOUND].
2946 	    ipr_nonhash[IPSEC_AF_V6]);
2947 	ipsec_inbound_v4_policy_present = (NULL !=
2948 	    system_policy.iph_root[IPSEC_TYPE_INBOUND].
2949 	    ipr_nonhash[IPSEC_AF_V4]);
2950 	ipsec_inbound_v6_policy_present = (NULL !=
2951 	    system_policy.iph_root[IPSEC_TYPE_INBOUND].
2952 	    ipr_nonhash[IPSEC_AF_V6]);
2953 }
2954 
2955 boolean_t
2956 ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir)
2957 {
2958 	ipsec_sel_t *sp;
2959 	ipsec_policy_t *ip, *nip, *head;
2960 	int af;
2961 	ipsec_policy_root_t *pr = &php->iph_root[dir];
2962 
2963 	sp = ipsec_find_sel(keys);
2964 
2965 	if (sp == NULL)
2966 		return (B_FALSE);
2967 
2968 	af = (sp->ipsl_key.ipsl_valid & IPSL_IPV4) ? IPSEC_AF_V4 : IPSEC_AF_V6;
2969 
2970 	rw_enter(&php->iph_lock, RW_WRITER);
2971 
2972 	if (keys->ipsl_hval == IPSEC_SEL_NOHASH) {
2973 		head = pr->ipr_nonhash[af];
2974 	} else {
2975 		head = pr->ipr_hash[keys->ipsl_hval].hash_head;
2976 	}
2977 
2978 	for (ip = head; ip != NULL; ip = nip) {
2979 		nip = ip->ipsp_hash.hash_next;
2980 		if (ip->ipsp_sel != sp) {
2981 			continue;
2982 		}
2983 
2984 		IPPOL_UNCHAIN(php, ip);
2985 
2986 		php->iph_gen++;
2987 		ipsec_update_present_flags();
2988 
2989 		rw_exit(&php->iph_lock);
2990 
2991 		ipsec_sel_rel(&sp);
2992 
2993 		return (B_TRUE);
2994 	}
2995 
2996 	rw_exit(&php->iph_lock);
2997 	ipsec_sel_rel(&sp);
2998 	return (B_FALSE);
2999 }
3000 
3001 int
3002 ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index)
3003 {
3004 	boolean_t found = B_FALSE;
3005 	ipsec_policy_t ipkey;
3006 	ipsec_policy_t *ip;
3007 	avl_index_t where;
3008 
3009 	(void) memset(&ipkey, 0, sizeof (ipkey));
3010 	ipkey.ipsp_index = policy_index;
3011 
3012 	rw_enter(&php->iph_lock, RW_WRITER);
3013 
3014 	/*
3015 	 * We could be cleverer here about the walk.
3016 	 * but well, (k+1)*log(N) will do for now (k==number of matches,
3017 	 * N==number of table entries
3018 	 */
3019 	for (;;) {
3020 		ip = (ipsec_policy_t *)avl_find(&php->iph_rulebyid,
3021 		    (void *)&ipkey, &where);
3022 		ASSERT(ip == NULL);
3023 
3024 		ip = avl_nearest(&php->iph_rulebyid, where, AVL_AFTER);
3025 
3026 		if (ip == NULL)
3027 			break;
3028 
3029 		if (ip->ipsp_index != policy_index) {
3030 			ASSERT(ip->ipsp_index > policy_index);
3031 			break;
3032 		}
3033 
3034 		IPPOL_UNCHAIN(php, ip);
3035 		found = B_TRUE;
3036 	}
3037 
3038 	if (found) {
3039 		php->iph_gen++;
3040 		ipsec_update_present_flags();
3041 	}
3042 
3043 	rw_exit(&php->iph_lock);
3044 
3045 	return (found ? 0 : ENOENT);
3046 }
3047 
3048 /*
3049  * Given a constructed ipsec_policy_t policy rule, see if it can be entered
3050  * into the correct policy ruleset.
3051  *
3052  * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a
3053  * duplicate policy exists with exactly the same selectors), or an icmp
3054  * rule exists with a different encryption/authentication action.
3055  */
3056 boolean_t
3057 ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction)
3058 {
3059 	ipsec_policy_root_t *pr = &php->iph_root[direction];
3060 	int af = -1;
3061 	ipsec_policy_t *p2, *head;
3062 	uint8_t check_proto;
3063 	ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key;
3064 	uint32_t	valid = selkey->ipsl_valid;
3065 
3066 	if (valid & IPSL_IPV6) {
3067 		ASSERT(!(valid & IPSL_IPV4));
3068 		af = IPSEC_AF_V6;
3069 		check_proto = IPPROTO_ICMPV6;
3070 	} else {
3071 		ASSERT(valid & IPSL_IPV4);
3072 		af = IPSEC_AF_V4;
3073 		check_proto = IPPROTO_ICMP;
3074 	}
3075 
3076 	ASSERT(RW_WRITE_HELD(&php->iph_lock));
3077 
3078 	/*
3079 	 * Double-check that we don't have any duplicate selectors here.
3080 	 * Because selectors are interned below, we need only compare pointers
3081 	 * for equality.
3082 	 */
3083 	if (selkey->ipsl_hval == IPSEC_SEL_NOHASH) {
3084 		head = pr->ipr_nonhash[af];
3085 	} else {
3086 		head = pr->ipr_hash[selkey->ipsl_hval].hash_head;
3087 	}
3088 
3089 	for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) {
3090 		if (p2->ipsp_sel == ipp->ipsp_sel)
3091 			return (B_FALSE);
3092 	}
3093 
3094 	/*
3095 	 * If it's ICMP and not a drop or pass rule, run through the ICMP
3096 	 * rules and make sure the action is either new or the same as any
3097 	 * other actions.  We don't have to check the full chain because
3098 	 * discard and bypass will override all other actions
3099 	 */
3100 
3101 	if (valid & IPSL_PROTOCOL &&
3102 	    selkey->ipsl_proto == check_proto &&
3103 	    (ipp->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_APPLY)) {
3104 
3105 		for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) {
3106 
3107 			if (p2->ipsp_sel->ipsl_key.ipsl_valid & IPSL_PROTOCOL &&
3108 			    p2->ipsp_sel->ipsl_key.ipsl_proto == check_proto &&
3109 			    (p2->ipsp_act->ipa_act.ipa_type ==
3110 				IPSEC_ACT_APPLY)) {
3111 				return (ipsec_compare_action(p2, ipp));
3112 			}
3113 		}
3114 	}
3115 
3116 	return (B_TRUE);
3117 }
3118 
3119 /*
3120  * compare the action chains of two policies for equality
3121  * B_TRUE -> effective equality
3122  */
3123 
3124 static boolean_t
3125 ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2)
3126 {
3127 
3128 	ipsec_action_t *act1, *act2;
3129 
3130 	/* We have a valid rule. Let's compare the actions */
3131 	if (p1->ipsp_act == p2->ipsp_act) {
3132 		/* same action. We are good */
3133 		return (B_TRUE);
3134 	}
3135 
3136 	/* we have to walk the chain */
3137 
3138 	act1 = p1->ipsp_act;
3139 	act2 = p2->ipsp_act;
3140 
3141 	while (act1 != NULL && act2 != NULL) {
3142 
3143 		/* otherwise, Are we close enough? */
3144 		if (act1->ipa_allow_clear != act2->ipa_allow_clear ||
3145 		    act1->ipa_want_ah != act2->ipa_want_ah ||
3146 		    act1->ipa_want_esp != act2->ipa_want_esp ||
3147 		    act1->ipa_want_se != act2->ipa_want_se) {
3148 			/* Nope, we aren't */
3149 			return (B_FALSE);
3150 		}
3151 
3152 		if (act1->ipa_want_ah) {
3153 			if (act1->ipa_act.ipa_apply.ipp_auth_alg !=
3154 			    act2->ipa_act.ipa_apply.ipp_auth_alg) {
3155 				return (B_FALSE);
3156 			}
3157 
3158 			if (act1->ipa_act.ipa_apply.ipp_ah_minbits !=
3159 			    act2->ipa_act.ipa_apply.ipp_ah_minbits ||
3160 			    act1->ipa_act.ipa_apply.ipp_ah_maxbits !=
3161 			    act2->ipa_act.ipa_apply.ipp_ah_maxbits) {
3162 				return (B_FALSE);
3163 			}
3164 		}
3165 
3166 		if (act1->ipa_want_esp) {
3167 			if (act1->ipa_act.ipa_apply.ipp_use_esp !=
3168 			    act2->ipa_act.ipa_apply.ipp_use_esp ||
3169 			    act1->ipa_act.ipa_apply.ipp_use_espa !=
3170 			    act2->ipa_act.ipa_apply.ipp_use_espa) {
3171 				return (B_FALSE);
3172 			}
3173 
3174 			if (act1->ipa_act.ipa_apply.ipp_use_esp) {
3175 				if (act1->ipa_act.ipa_apply.ipp_encr_alg !=
3176 				    act2->ipa_act.ipa_apply.ipp_encr_alg) {
3177 					return (B_FALSE);
3178 				}
3179 
3180 				if (act1->ipa_act.ipa_apply.ipp_espe_minbits !=
3181 				    act2->ipa_act.ipa_apply.ipp_espe_minbits ||
3182 				    act1->ipa_act.ipa_apply.ipp_espe_maxbits !=
3183 				    act2->ipa_act.ipa_apply.ipp_espe_maxbits) {
3184 					return (B_FALSE);
3185 				}
3186 			}
3187 
3188 			if (act1->ipa_act.ipa_apply.ipp_use_espa) {
3189 				if (act1->ipa_act.ipa_apply.ipp_esp_auth_alg !=
3190 				    act2->ipa_act.ipa_apply.ipp_esp_auth_alg) {
3191 					return (B_FALSE);
3192 				}
3193 
3194 				if (act1->ipa_act.ipa_apply.ipp_espa_minbits !=
3195 				    act2->ipa_act.ipa_apply.ipp_espa_minbits ||
3196 				    act1->ipa_act.ipa_apply.ipp_espa_maxbits !=
3197 				    act2->ipa_act.ipa_apply.ipp_espa_maxbits) {
3198 					return (B_FALSE);
3199 				}
3200 			}
3201 
3202 		}
3203 
3204 		act1 = act1->ipa_next;
3205 		act2 = act2->ipa_next;
3206 	}
3207 
3208 	if (act1 != NULL || act2 != NULL) {
3209 		return (B_FALSE);
3210 	}
3211 
3212 	return (B_TRUE);
3213 }
3214 
3215 
3216 /*
3217  * Given a constructed ipsec_policy_t policy rule, enter it into
3218  * the correct policy ruleset.
3219  *
3220  * ipsec_check_policy() is assumed to have succeeded first (to check for
3221  * duplicates).
3222  */
3223 void
3224 ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction)
3225 {
3226 	ipsec_policy_root_t *pr = &php->iph_root[direction];
3227 	ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key;
3228 	uint32_t valid = selkey->ipsl_valid;
3229 	uint32_t hval = selkey->ipsl_hval;
3230 	int af = -1;
3231 
3232 	ASSERT(RW_WRITE_HELD(&php->iph_lock));
3233 
3234 	if (valid & IPSL_IPV6) {
3235 		ASSERT(!(valid & IPSL_IPV4));
3236 		af = IPSEC_AF_V6;
3237 	} else {
3238 		ASSERT(valid & IPSL_IPV4);
3239 		af = IPSEC_AF_V4;
3240 	}
3241 
3242 	php->iph_gen++;
3243 
3244 	if (hval == IPSEC_SEL_NOHASH) {
3245 		HASHLIST_INSERT(ipp, ipsp_hash, pr->ipr_nonhash[af]);
3246 	} else {
3247 		HASH_LOCK(pr->ipr_hash, hval);
3248 		HASH_INSERT(ipp, ipsp_hash, pr->ipr_hash, hval);
3249 		HASH_UNLOCK(pr->ipr_hash, hval);
3250 	}
3251 
3252 	ipsec_insert_always(&php->iph_rulebyid, ipp);
3253 
3254 	ipsec_update_present_flags();
3255 }
3256 
3257 static void
3258 ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr)
3259 {
3260 	ipsec_policy_t *ip, *nip;
3261 
3262 	int af, chain, nchain;
3263 
3264 	for (af = 0; af < IPSEC_NAF; af++) {
3265 		for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) {
3266 			nip = ip->ipsp_hash.hash_next;
3267 			IPPOL_UNCHAIN(php, ip);
3268 		}
3269 		ipr->ipr_nonhash[af] = NULL;
3270 	}
3271 	nchain = ipr->ipr_nchains;
3272 
3273 	for (chain = 0; chain < nchain; chain++) {
3274 		for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL;
3275 		    ip = nip) {
3276 			nip = ip->ipsp_hash.hash_next;
3277 			IPPOL_UNCHAIN(php, ip);
3278 		}
3279 		ipr->ipr_hash[chain].hash_head = NULL;
3280 	}
3281 }
3282 
3283 
3284 void
3285 ipsec_polhead_flush(ipsec_policy_head_t *php)
3286 {
3287 	int dir;
3288 
3289 	ASSERT(RW_WRITE_HELD(&php->iph_lock));
3290 
3291 	for (dir = 0; dir < IPSEC_NTYPES; dir++)
3292 		ipsec_ipr_flush(php, &php->iph_root[dir]);
3293 
3294 	ipsec_update_present_flags();
3295 }
3296 
3297 void
3298 ipsec_polhead_free(ipsec_policy_head_t *php)
3299 {
3300 	ASSERT(php->iph_refs == 0);
3301 	rw_enter(&php->iph_lock, RW_WRITER);
3302 	ipsec_polhead_flush(php);
3303 	rw_exit(&php->iph_lock);
3304 	rw_destroy(&php->iph_lock);
3305 	kmem_free(php, sizeof (*php));
3306 }
3307 
3308 static void
3309 ipsec_ipr_init(ipsec_policy_root_t *ipr)
3310 {
3311 	int af;
3312 
3313 	ipr->ipr_nchains = 0;
3314 	ipr->ipr_hash = NULL;
3315 
3316 	for (af = 0; af < IPSEC_NAF; af++) {
3317 		ipr->ipr_nonhash[af] = NULL;
3318 	}
3319 }
3320 
3321 extern ipsec_policy_head_t *
3322 ipsec_polhead_create(void)
3323 {
3324 	ipsec_policy_head_t *php;
3325 
3326 	php = kmem_alloc(sizeof (*php), KM_NOSLEEP);
3327 	if (php == NULL)
3328 		return (php);
3329 
3330 	rw_init(&php->iph_lock, NULL, RW_DEFAULT, NULL);
3331 	php->iph_refs = 1;
3332 	php->iph_gen = 0;
3333 
3334 	ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_INBOUND]);
3335 	ipsec_ipr_init(&php->iph_root[IPSEC_TYPE_OUTBOUND]);
3336 
3337 	avl_create(&php->iph_rulebyid, ipsec_policy_cmpbyid,
3338 	    sizeof (ipsec_policy_t), offsetof(ipsec_policy_t, ipsp_byid));
3339 
3340 	return (php);
3341 }
3342 
3343 /*
3344  * Clone the policy head into a new polhead; release one reference to the
3345  * old one and return the only reference to the new one.
3346  * If the old one had a refcount of 1, just return it.
3347  */
3348 extern ipsec_policy_head_t *
3349 ipsec_polhead_split(ipsec_policy_head_t *php)
3350 {
3351 	ipsec_policy_head_t *nphp;
3352 
3353 	if (php == NULL)
3354 		return (ipsec_polhead_create());
3355 	else if (php->iph_refs == 1)
3356 		return (php);
3357 
3358 	nphp = ipsec_polhead_create();
3359 	if (nphp == NULL)
3360 		return (NULL);
3361 
3362 	if (ipsec_copy_polhead(php, nphp) != 0) {
3363 		ipsec_polhead_free(nphp);
3364 		return (NULL);
3365 	}
3366 	IPPH_REFRELE(php);
3367 	return (nphp);
3368 }
3369 
3370 /*
3371  * When sending a response to a ICMP request or generating a RST
3372  * in the TCP case, the outbound packets need to go at the same level
3373  * of protection as the incoming ones i.e we associate our outbound
3374  * policy with how the packet came in. We call this after we have
3375  * accepted the incoming packet which may or may not have been in
3376  * clear and hence we are sending the reply back with the policy
3377  * matching the incoming datagram's policy.
3378  *
3379  * NOTE : This technology serves two purposes :
3380  *
3381  * 1) If we have multiple outbound policies, we send out a reply
3382  *    matching with how it came in rather than matching the outbound
3383  *    policy.
3384  *
3385  * 2) For assymetric policies, we want to make sure that incoming
3386  *    and outgoing has the same level of protection. Assymetric
3387  *    policies exist only with global policy where we may not have
3388  *    both outbound and inbound at the same time.
3389  *
3390  * NOTE2:	This function is called by cleartext cases, so it needs to be
3391  *		in IP proper.
3392  */
3393 boolean_t
3394 ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h)
3395 {
3396 	ipsec_in_t  *ii;
3397 	ipsec_out_t  *io;
3398 	boolean_t v4;
3399 	mblk_t *mp;
3400 	boolean_t secure, attach_if;
3401 	uint_t ifindex;
3402 	ipsec_selector_t sel;
3403 	ipsec_action_t *reflect_action = NULL;
3404 	zoneid_t zoneid;
3405 
3406 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
3407 
3408 	bzero((void*)&sel, sizeof (sel));
3409 
3410 	ii = (ipsec_in_t *)ipsec_mp->b_rptr;
3411 
3412 	mp = ipsec_mp->b_cont;
3413 	ASSERT(mp != NULL);
3414 
3415 	if (ii->ipsec_in_action != NULL) {
3416 		/* transfer reference.. */
3417 		reflect_action = ii->ipsec_in_action;
3418 		ii->ipsec_in_action = NULL;
3419 	} else if (!ii->ipsec_in_loopback)
3420 		reflect_action = ipsec_in_to_out_action(ii);
3421 	secure = ii->ipsec_in_secure;
3422 	attach_if = ii->ipsec_in_attach_if;
3423 	ifindex = ii->ipsec_in_ill_index;
3424 	zoneid = ii->ipsec_in_zoneid;
3425 	v4 = ii->ipsec_in_v4;
3426 
3427 	ipsec_in_release_refs(ii);
3428 
3429 	/*
3430 	 * The caller is going to send the datagram out which might
3431 	 * go on the wire or delivered locally through ip_wput_local.
3432 	 *
3433 	 * 1) If it goes out on the wire, new associations will be
3434 	 *    obtained.
3435 	 * 2) If it is delivered locally, ip_wput_local will convert
3436 	 *    this IPSEC_OUT to a IPSEC_IN looking at the requests.
3437 	 */
3438 
3439 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
3440 	bzero(io, sizeof (ipsec_out_t));
3441 	io->ipsec_out_type = IPSEC_OUT;
3442 	io->ipsec_out_len = sizeof (ipsec_out_t);
3443 	io->ipsec_out_frtn.free_func = ipsec_out_free;
3444 	io->ipsec_out_frtn.free_arg = (char *)io;
3445 	io->ipsec_out_act = reflect_action;
3446 
3447 	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h))
3448 		return (B_FALSE);
3449 
3450 	io->ipsec_out_src_port = sel.ips_local_port;
3451 	io->ipsec_out_dst_port = sel.ips_remote_port;
3452 	io->ipsec_out_proto = sel.ips_protocol;
3453 	io->ipsec_out_icmp_type = sel.ips_icmp_type;
3454 	io->ipsec_out_icmp_code = sel.ips_icmp_code;
3455 
3456 	/*
3457 	 * Don't use global policy for this, as we want
3458 	 * to use the same protection that was applied to the inbound packet.
3459 	 */
3460 	io->ipsec_out_use_global_policy = B_FALSE;
3461 	io->ipsec_out_proc_begin = B_FALSE;
3462 	io->ipsec_out_secure = secure;
3463 	io->ipsec_out_v4 = v4;
3464 	io->ipsec_out_attach_if = attach_if;
3465 	io->ipsec_out_ill_index = ifindex;
3466 	io->ipsec_out_zoneid = zoneid;
3467 	return (B_TRUE);
3468 }
3469 
3470 mblk_t *
3471 ipsec_in_tag(mblk_t *mp, mblk_t *cont)
3472 {
3473 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
3474 	ipsec_in_t *nii;
3475 	mblk_t *nmp;
3476 	frtn_t nfrtn;
3477 
3478 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
3479 	ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t));
3480 
3481 	nmp = ipsec_in_alloc(ii->ipsec_in_v4);
3482 
3483 	ASSERT(nmp->b_datap->db_type == M_CTL);
3484 	ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t)));
3485 
3486 	/*
3487 	 * Bump refcounts.
3488 	 */
3489 	if (ii->ipsec_in_ah_sa != NULL)
3490 		IPSA_REFHOLD(ii->ipsec_in_ah_sa);
3491 	if (ii->ipsec_in_esp_sa != NULL)
3492 		IPSA_REFHOLD(ii->ipsec_in_esp_sa);
3493 	if (ii->ipsec_in_policy != NULL)
3494 		IPPH_REFHOLD(ii->ipsec_in_policy);
3495 
3496 	/*
3497 	 * Copy everything, but preserve the free routine provided by
3498 	 * ipsec_in_alloc().
3499 	 */
3500 	nii = (ipsec_in_t *)nmp->b_rptr;
3501 	nfrtn = nii->ipsec_in_frtn;
3502 	bcopy(ii, nii, sizeof (*ii));
3503 	nii->ipsec_in_frtn = nfrtn;
3504 
3505 	nmp->b_cont = cont;
3506 
3507 	return (nmp);
3508 }
3509 
3510 mblk_t *
3511 ipsec_out_tag(mblk_t *mp, mblk_t *cont)
3512 {
3513 	ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
3514 	ipsec_out_t *nio;
3515 	mblk_t *nmp;
3516 	frtn_t nfrtn;
3517 
3518 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
3519 	ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t));
3520 
3521 	nmp = ipsec_alloc_ipsec_out();
3522 	if (nmp == NULL) {
3523 		freemsg(cont);	/* XXX ip_drop_packet() ? */
3524 		return (NULL);
3525 	}
3526 	ASSERT(nmp->b_datap->db_type == M_CTL);
3527 	ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t)));
3528 
3529 	/*
3530 	 * Bump refcounts.
3531 	 */
3532 	if (io->ipsec_out_ah_sa != NULL)
3533 		IPSA_REFHOLD(io->ipsec_out_ah_sa);
3534 	if (io->ipsec_out_esp_sa != NULL)
3535 		IPSA_REFHOLD(io->ipsec_out_esp_sa);
3536 	if (io->ipsec_out_polhead != NULL)
3537 		IPPH_REFHOLD(io->ipsec_out_polhead);
3538 	if (io->ipsec_out_policy != NULL)
3539 		IPPOL_REFHOLD(io->ipsec_out_policy);
3540 	if (io->ipsec_out_act != NULL)
3541 		IPACT_REFHOLD(io->ipsec_out_act);
3542 	if (io->ipsec_out_latch != NULL)
3543 		IPLATCH_REFHOLD(io->ipsec_out_latch);
3544 	if (io->ipsec_out_cred != NULL)
3545 		crhold(io->ipsec_out_cred);
3546 
3547 	/*
3548 	 * Copy everything, but preserve the free routine provided by
3549 	 * ipsec_alloc_ipsec_out().
3550 	 */
3551 	nio = (ipsec_out_t *)nmp->b_rptr;
3552 	nfrtn = nio->ipsec_out_frtn;
3553 	bcopy(io, nio, sizeof (*io));
3554 	nio->ipsec_out_frtn = nfrtn;
3555 
3556 	nmp->b_cont = cont;
3557 
3558 	return (nmp);
3559 }
3560 
3561 static void
3562 ipsec_out_release_refs(ipsec_out_t *io)
3563 {
3564 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
3565 	ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t));
3566 
3567 	/* Note: IPSA_REFRELE is multi-line macro */
3568 	if (io->ipsec_out_ah_sa != NULL)
3569 		IPSA_REFRELE(io->ipsec_out_ah_sa);
3570 	if (io->ipsec_out_esp_sa != NULL)
3571 		IPSA_REFRELE(io->ipsec_out_esp_sa);
3572 	if (io->ipsec_out_polhead != NULL)
3573 		IPPH_REFRELE(io->ipsec_out_polhead);
3574 	if (io->ipsec_out_policy != NULL)
3575 		IPPOL_REFRELE(io->ipsec_out_policy);
3576 	if (io->ipsec_out_act != NULL)
3577 		IPACT_REFRELE(io->ipsec_out_act);
3578 	if (io->ipsec_out_cred != NULL) {
3579 		crfree(io->ipsec_out_cred);
3580 		io->ipsec_out_cred = NULL;
3581 	}
3582 	if (io->ipsec_out_latch) {
3583 		IPLATCH_REFRELE(io->ipsec_out_latch);
3584 		io->ipsec_out_latch = NULL;
3585 	}
3586 }
3587 
3588 static void
3589 ipsec_out_free(void *arg)
3590 {
3591 	ipsec_out_t *io = (ipsec_out_t *)arg;
3592 	ipsec_out_release_refs(io);
3593 	kmem_cache_free(ipsec_info_cache, arg);
3594 }
3595 
3596 static void
3597 ipsec_in_release_refs(ipsec_in_t *ii)
3598 {
3599 	/* Note: IPSA_REFRELE is multi-line macro */
3600 	if (ii->ipsec_in_ah_sa != NULL)
3601 		IPSA_REFRELE(ii->ipsec_in_ah_sa);
3602 	if (ii->ipsec_in_esp_sa != NULL)
3603 		IPSA_REFRELE(ii->ipsec_in_esp_sa);
3604 	if (ii->ipsec_in_policy != NULL)
3605 		IPPH_REFRELE(ii->ipsec_in_policy);
3606 	if (ii->ipsec_in_da != NULL) {
3607 		freeb(ii->ipsec_in_da);
3608 		ii->ipsec_in_da = NULL;
3609 	}
3610 }
3611 
3612 static void
3613 ipsec_in_free(void *arg)
3614 {
3615 	ipsec_in_t *ii = (ipsec_in_t *)arg;
3616 	ipsec_in_release_refs(ii);
3617 	kmem_cache_free(ipsec_info_cache, arg);
3618 }
3619 
3620 /*
3621  * This is called only for outbound datagrams if the datagram needs to
3622  * go out secure.  A NULL mp can be passed to get an ipsec_out. This
3623  * facility is used by ip_unbind.
3624  *
3625  * NOTE : o As the data part could be modified by ipsec_out_process etc.
3626  *	    we can't make it fast by calling a dup.
3627  */
3628 mblk_t *
3629 ipsec_alloc_ipsec_out()
3630 {
3631 	mblk_t *ipsec_mp;
3632 
3633 	ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP);
3634 
3635 	if (io == NULL)
3636 		return (NULL);
3637 
3638 	bzero(io, sizeof (ipsec_out_t));
3639 
3640 	io->ipsec_out_type = IPSEC_OUT;
3641 	io->ipsec_out_len = sizeof (ipsec_out_t);
3642 	io->ipsec_out_frtn.free_func = ipsec_out_free;
3643 	io->ipsec_out_frtn.free_arg = (char *)io;
3644 
3645 	/*
3646 	 * Set the zoneid to ALL_ZONES which is used as an invalid value. Code
3647 	 * using ipsec_out_zoneid should assert that the zoneid has been set to
3648 	 * a sane value.
3649 	 */
3650 	io->ipsec_out_zoneid = ALL_ZONES;
3651 
3652 	ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI,
3653 	    &io->ipsec_out_frtn);
3654 	if (ipsec_mp == NULL) {
3655 		ipsec_out_free(io);
3656 
3657 		return (NULL);
3658 	}
3659 	ipsec_mp->b_datap->db_type = M_CTL;
3660 	ipsec_mp->b_wptr = ipsec_mp->b_rptr + sizeof (ipsec_info_t);
3661 
3662 	return (ipsec_mp);
3663 }
3664 
3665 /*
3666  * Attach an IPSEC_OUT; use pol for policy if it is non-null.
3667  * Otherwise initialize using conn.
3668  *
3669  * If pol is non-null, we consume a reference to it.
3670  */
3671 mblk_t *
3672 ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol,
3673     uint8_t proto)
3674 {
3675 	mblk_t *ipsec_mp;
3676 
3677 	ASSERT((pol != NULL) || (connp != NULL));
3678 
3679 	ipsec_mp = ipsec_alloc_ipsec_out();
3680 	if (ipsec_mp == NULL) {
3681 		(void) mi_strlog(CONNP_TO_WQ(connp), 0, SL_ERROR|SL_NOTE,
3682 		    "ipsec_attach_ipsec_out: Allocation failure\n");
3683 		BUMP_MIB(&ip_mib, ipOutDiscards);
3684 		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem,
3685 		    &spd_dropper);
3686 		return (NULL);
3687 	}
3688 	ipsec_mp->b_cont = mp;
3689 	return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto));
3690 }
3691 
3692 /*
3693  * Initialize the IPSEC_OUT (ipsec_mp) using pol if it is non-null.
3694  * Otherwise initialize using conn.
3695  *
3696  * If pol is non-null, we consume a reference to it.
3697  */
3698 mblk_t *
3699 ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol,
3700     uint8_t proto)
3701 {
3702 	mblk_t *mp;
3703 	ipsec_out_t *io;
3704 	ipsec_policy_t *p;
3705 	ipha_t *ipha;
3706 	ip6_t *ip6h;
3707 
3708 	ASSERT((pol != NULL) || (connp != NULL));
3709 
3710 	/*
3711 	 * If mp is NULL, we won't/should not be using it.
3712 	 */
3713 	mp = ipsec_mp->b_cont;
3714 
3715 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
3716 	ASSERT(ipsec_mp->b_wptr == (ipsec_mp->b_rptr + sizeof (ipsec_info_t)));
3717 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
3718 	ASSERT(io->ipsec_out_type == IPSEC_OUT);
3719 	ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t));
3720 	io->ipsec_out_latch = NULL;
3721 	/*
3722 	 * Set the zoneid when we have the connp.
3723 	 * Otherwise, we're called from ip_wput_attach_policy() who will take
3724 	 * care of setting the zoneid.
3725 	 */
3726 	if (connp != NULL)
3727 		io->ipsec_out_zoneid = connp->conn_zoneid;
3728 
3729 	if (mp != NULL) {
3730 		ipha = (ipha_t *)mp->b_rptr;
3731 		if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
3732 			io->ipsec_out_v4 = B_TRUE;
3733 			ip6h = NULL;
3734 		} else {
3735 			io->ipsec_out_v4 = B_FALSE;
3736 			ip6h = (ip6_t *)ipha;
3737 			ipha = NULL;
3738 		}
3739 	} else {
3740 		ASSERT(connp != NULL && connp->conn_policy_cached);
3741 		ip6h = NULL;
3742 		ipha = NULL;
3743 		io->ipsec_out_v4 = !connp->conn_pkt_isv6;
3744 	}
3745 
3746 	p = NULL;
3747 
3748 	/*
3749 	 * Take latched policies over global policy.  Check here again for
3750 	 * this, in case we had conn_latch set while the packet was flying
3751 	 * around in IP.
3752 	 */
3753 	if (connp != NULL && connp->conn_latch != NULL) {
3754 		p = connp->conn_latch->ipl_out_policy;
3755 		io->ipsec_out_latch = connp->conn_latch;
3756 		IPLATCH_REFHOLD(connp->conn_latch);
3757 		if (p != NULL) {
3758 			IPPOL_REFHOLD(p);
3759 		}
3760 		io->ipsec_out_src_port = connp->conn_lport;
3761 		io->ipsec_out_dst_port = connp->conn_fport;
3762 		io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0;
3763 		if (pol != NULL)
3764 			IPPOL_REFRELE(pol);
3765 	} else if (pol != NULL) {
3766 		ipsec_selector_t sel;
3767 
3768 		bzero((void*)&sel, sizeof (sel));
3769 
3770 		p = pol;
3771 		/*
3772 		 * conn does not have the port information. Get
3773 		 * it from the packet.
3774 		 */
3775 
3776 		if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
3777 			/* XXX any cleanup required here?? */
3778 			return (NULL);
3779 		}
3780 		io->ipsec_out_src_port = sel.ips_local_port;
3781 		io->ipsec_out_dst_port = sel.ips_remote_port;
3782 		io->ipsec_out_icmp_type = sel.ips_icmp_type;
3783 		io->ipsec_out_icmp_code = sel.ips_icmp_code;
3784 	}
3785 
3786 	io->ipsec_out_proto = proto;
3787 	io->ipsec_out_use_global_policy = B_TRUE;
3788 	io->ipsec_out_secure = (p != NULL);
3789 	io->ipsec_out_policy = p;
3790 
3791 	if (p == NULL) {
3792 		if (connp->conn_policy != NULL) {
3793 			io->ipsec_out_secure = B_TRUE;
3794 			ASSERT(io->ipsec_out_latch == NULL);
3795 			ASSERT(io->ipsec_out_use_global_policy == B_TRUE);
3796 			io->ipsec_out_need_policy = B_TRUE;
3797 			ASSERT(io->ipsec_out_polhead == NULL);
3798 			IPPH_REFHOLD(connp->conn_policy);
3799 			io->ipsec_out_polhead = connp->conn_policy;
3800 		}
3801 	}
3802 	return (ipsec_mp);
3803 }
3804 
3805 /*
3806  * Allocate an IPSEC_IN mblk.  This will be prepended to an inbound datagram
3807  * and keep track of what-if-any IPsec processing will be applied to the
3808  * datagram.
3809  */
3810 mblk_t *
3811 ipsec_in_alloc(boolean_t isv4)
3812 {
3813 	mblk_t *ipsec_in;
3814 	ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP);
3815 
3816 	if (ii == NULL)
3817 		return (NULL);
3818 
3819 	bzero(ii, sizeof (ipsec_info_t));
3820 	ii->ipsec_in_type = IPSEC_IN;
3821 	ii->ipsec_in_len = sizeof (ipsec_in_t);
3822 
3823 	ii->ipsec_in_v4 = isv4;
3824 	ii->ipsec_in_secure = B_TRUE;
3825 
3826 	ii->ipsec_in_frtn.free_func = ipsec_in_free;
3827 	ii->ipsec_in_frtn.free_arg = (char *)ii;
3828 
3829 	ipsec_in = desballoc((uint8_t *)ii, sizeof (ipsec_info_t), BPRI_HI,
3830 	    &ii->ipsec_in_frtn);
3831 	if (ipsec_in == NULL) {
3832 		ip1dbg(("ipsec_in_alloc: IPSEC_IN allocation failure.\n"));
3833 		ipsec_in_free(ii);
3834 		return (NULL);
3835 	}
3836 
3837 	ipsec_in->b_datap->db_type = M_CTL;
3838 	ipsec_in->b_wptr += sizeof (ipsec_info_t);
3839 
3840 	return (ipsec_in);
3841 }
3842 
3843 /*
3844  * This is called from ip_wput_local when a packet which needs
3845  * security is looped back, to convert the IPSEC_OUT to a IPSEC_IN
3846  * before fanout, where the policy check happens.  In most of the
3847  * cases, IPSEC processing has *never* been done.  There is one case
3848  * (ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed) where
3849  * the packet is destined for localhost, IPSEC processing has already
3850  * been done.
3851  *
3852  * Future: This could happen after SA selection has occurred for
3853  * outbound.. which will tell us who the src and dst identities are..
3854  * Then it's just a matter of splicing the ah/esp SA pointers from the
3855  * ipsec_out_t to the ipsec_in_t.
3856  */
3857 void
3858 ipsec_out_to_in(mblk_t *ipsec_mp)
3859 {
3860 	ipsec_in_t  *ii;
3861 	ipsec_out_t *io;
3862 	ipsec_policy_t *pol;
3863 	ipsec_action_t *act;
3864 	boolean_t v4, icmp_loopback;
3865 
3866 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
3867 
3868 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
3869 
3870 	v4 = io->ipsec_out_v4;
3871 	icmp_loopback = io->ipsec_out_icmp_loopback;
3872 
3873 	act = io->ipsec_out_act;
3874 	if (act == NULL) {
3875 		pol = io->ipsec_out_policy;
3876 		if (pol != NULL) {
3877 			act = pol->ipsp_act;
3878 			IPACT_REFHOLD(act);
3879 		}
3880 	}
3881 	io->ipsec_out_act = NULL;
3882 
3883 	ipsec_out_release_refs(io);
3884 
3885 	ii = (ipsec_in_t *)ipsec_mp->b_rptr;
3886 	bzero(ii, sizeof (ipsec_in_t));
3887 	ii->ipsec_in_type = IPSEC_IN;
3888 	ii->ipsec_in_len = sizeof (ipsec_in_t);
3889 	ii->ipsec_in_loopback = B_TRUE;
3890 	ii->ipsec_in_frtn.free_func = ipsec_in_free;
3891 	ii->ipsec_in_frtn.free_arg = (char *)ii;
3892 	ii->ipsec_in_action = act;
3893 
3894 	/*
3895 	 * In most of the cases, we can't look at the ipsec_out_XXX_sa
3896 	 * because this never went through IPSEC processing. So, look at
3897 	 * the requests and infer whether it would have gone through
3898 	 * IPSEC processing or not. Initialize the "done" fields with
3899 	 * the requests. The possible values for "done" fields are :
3900 	 *
3901 	 * 1) zero, indicates that a particular preference was never
3902 	 *    requested.
3903 	 * 2) non-zero, indicates that it could be IPSEC_PREF_REQUIRED/
3904 	 *    IPSEC_PREF_NEVER. If IPSEC_REQ_DONE is set, it means that
3905 	 *    IPSEC processing has been completed.
3906 	 */
3907 	ii->ipsec_in_secure = B_TRUE;
3908 	ii->ipsec_in_v4 = v4;
3909 	ii->ipsec_in_icmp_loopback = icmp_loopback;
3910 	ii->ipsec_in_attach_if = B_FALSE;
3911 }
3912 
3913 /*
3914  * Consults global policy to see whether this datagram should
3915  * go out secure. If so it attaches a ipsec_mp in front and
3916  * returns.
3917  */
3918 mblk_t *
3919 ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire,
3920     conn_t *connp, boolean_t unspec_src)
3921 {
3922 	mblk_t *mp;
3923 	ipsec_out_t *io = NULL;
3924 	ipsec_selector_t sel;
3925 	uint_t	ill_index;
3926 	boolean_t conn_dontroutex;
3927 	boolean_t conn_multicast_loopx;
3928 	boolean_t policy_present;
3929 
3930 	ASSERT((ipha != NULL && ip6h == NULL) ||
3931 	    (ip6h != NULL && ipha == NULL));
3932 
3933 	bzero((void*)&sel, sizeof (sel));
3934 
3935 	if (ipha != NULL)
3936 		policy_present = ipsec_outbound_v4_policy_present;
3937 	else
3938 		policy_present = ipsec_outbound_v6_policy_present;
3939 	/*
3940 	 * Fast Path to see if there is any policy.
3941 	 */
3942 	if (!policy_present) {
3943 		if (ipsec_mp->b_datap->db_type == M_CTL) {
3944 			io = (ipsec_out_t *)ipsec_mp->b_rptr;
3945 			if (!io->ipsec_out_secure) {
3946 				/*
3947 				 * If there is no global policy and ip_wput
3948 				 * or ip_wput_multicast has attached this mp
3949 				 * for multicast case, free the ipsec_mp and
3950 				 * return the original mp.
3951 				 */
3952 				mp = ipsec_mp->b_cont;
3953 				freeb(ipsec_mp);
3954 				ipsec_mp = mp;
3955 				io = NULL;
3956 			}
3957 		}
3958 		if (((io == NULL) || (io->ipsec_out_polhead == NULL)) &&
3959 		    ((connp == NULL) || (connp->conn_policy == NULL)))
3960 			return (ipsec_mp);
3961 	}
3962 
3963 	ill_index = 0;
3964 	conn_multicast_loopx = conn_dontroutex = B_FALSE;
3965 	mp = ipsec_mp;
3966 	if (ipsec_mp->b_datap->db_type == M_CTL) {
3967 		mp = ipsec_mp->b_cont;
3968 		/*
3969 		 * This is a connection where we have some per-socket
3970 		 * policy or ip_wput has attached an ipsec_mp for
3971 		 * the multicast datagram.
3972 		 */
3973 		io = (ipsec_out_t *)ipsec_mp->b_rptr;
3974 		if (!io->ipsec_out_secure) {
3975 			/*
3976 			 * This ipsec_mp was allocated in ip_wput or
3977 			 * ip_wput_multicast so that we will know the
3978 			 * value of ill_index, conn_dontroute,
3979 			 * conn_multicast_loop in the multicast case if
3980 			 * we inherit global policy here.
3981 			 */
3982 			ill_index = io->ipsec_out_ill_index;
3983 			conn_dontroutex = io->ipsec_out_dontroute;
3984 			conn_multicast_loopx = io->ipsec_out_multicast_loop;
3985 			freeb(ipsec_mp);
3986 			ipsec_mp = mp;
3987 			io = NULL;
3988 		}
3989 	}
3990 
3991 	if (ipha != NULL) {
3992 		sel.ips_local_addr_v4 = (ipha->ipha_src != 0 ?
3993 		    ipha->ipha_src : ire->ire_src_addr);
3994 		sel.ips_remote_addr_v4 = ip_get_dst(ipha);
3995 		sel.ips_protocol = (uint8_t)ipha->ipha_protocol;
3996 		sel.ips_isv4 = B_TRUE;
3997 	} else {
3998 		ushort_t hdr_len;
3999 		uint8_t	*nexthdrp;
4000 		boolean_t is_fragment;
4001 
4002 		sel.ips_isv4 = B_FALSE;
4003 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
4004 			if (!unspec_src)
4005 				sel.ips_local_addr_v6 = ire->ire_src_addr_v6;
4006 		} else {
4007 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4008 		}
4009 
4010 		sel.ips_remote_addr_v6 = ip_get_dst_v6(ip6h, &is_fragment);
4011 		if (is_fragment) {
4012 			/*
4013 			 * It's a packet fragment for a packet that
4014 			 * we have already processed (since IPsec processing
4015 			 * is done before fragmentation), so we don't
4016 			 * have to do policy checks again. Fragments can
4017 			 * come back to us for processing if they have
4018 			 * been queued up due to flow control.
4019 			 */
4020 			if (ipsec_mp->b_datap->db_type == M_CTL) {
4021 				mp = ipsec_mp->b_cont;
4022 				freeb(ipsec_mp);
4023 				ipsec_mp = mp;
4024 			}
4025 			return (ipsec_mp);
4026 		}
4027 
4028 		/* IPv6 common-case. */
4029 		sel.ips_protocol = ip6h->ip6_nxt;
4030 		switch (ip6h->ip6_nxt) {
4031 		case IPPROTO_TCP:
4032 		case IPPROTO_UDP:
4033 		case IPPROTO_SCTP:
4034 		case IPPROTO_ICMPV6:
4035 			break;
4036 		default:
4037 			if (!ip_hdr_length_nexthdr_v6(mp, ip6h,
4038 			    &hdr_len, &nexthdrp)) {
4039 				BUMP_MIB(&ip6_mib, ipv6OutDiscards);
4040 				freemsg(ipsec_mp); /* Not IPsec-related drop. */
4041 				return (NULL);
4042 			}
4043 			sel.ips_protocol = *nexthdrp;
4044 			break;
4045 		}
4046 	}
4047 
4048 	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
4049 		if (ipha != NULL) {
4050 			BUMP_MIB(&ip_mib, ipOutDiscards);
4051 		} else {
4052 			BUMP_MIB(&ip6_mib, ipv6OutDiscards);
4053 		}
4054 
4055 		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
4056 		    &ipdrops_spd_nomem, &spd_dropper);
4057 		return (NULL);
4058 	}
4059 
4060 	if (io != NULL) {
4061 		/*
4062 		 * We seem to have some local policy (we already have
4063 		 * an ipsec_out).  Look at global policy and see
4064 		 * whether we have to inherit or not.
4065 		 */
4066 		io->ipsec_out_need_policy = B_FALSE;
4067 		ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, &sel);
4068 		ASSERT((io->ipsec_out_policy != NULL) ||
4069 		    (io->ipsec_out_act != NULL));
4070 		ASSERT(io->ipsec_out_need_policy == B_FALSE);
4071 		return (ipsec_mp);
4072 	}
4073 	ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel);
4074 	if (ipsec_mp == NULL)
4075 		return (mp);
4076 
4077 	/*
4078 	 * Copy the right port information.
4079 	 */
4080 	ASSERT(ipsec_mp->b_datap->db_type == M_CTL);
4081 	io = (ipsec_out_t *)ipsec_mp->b_rptr;
4082 
4083 	ASSERT(io->ipsec_out_need_policy == B_FALSE);
4084 	ASSERT((io->ipsec_out_policy != NULL) ||
4085 	    (io->ipsec_out_act != NULL));
4086 	io->ipsec_out_src_port = sel.ips_local_port;
4087 	io->ipsec_out_dst_port = sel.ips_remote_port;
4088 	io->ipsec_out_icmp_type = sel.ips_icmp_type;
4089 	io->ipsec_out_icmp_code = sel.ips_icmp_code;
4090 	/*
4091 	 * Set ill_index, conn_dontroute and conn_multicast_loop
4092 	 * for multicast datagrams.
4093 	 */
4094 	io->ipsec_out_ill_index = ill_index;
4095 	io->ipsec_out_dontroute = conn_dontroutex;
4096 	io->ipsec_out_multicast_loop = conn_multicast_loopx;
4097 	/*
4098 	 * When conn is non-NULL, the zoneid is set by ipsec_init_ipsec_out().
4099 	 * Otherwise set the zoneid based on the ire.
4100 	 */
4101 	if (connp == NULL)
4102 		io->ipsec_out_zoneid = ire->ire_zoneid;
4103 	return (ipsec_mp);
4104 }
4105 
4106 /*
4107  * When appropriate, this function caches inbound and outbound policy
4108  * for this connection.
4109  *
4110  * XXX need to work out more details about per-interface policy and
4111  * caching here!
4112  *
4113  * XXX may want to split inbound and outbound caching for ill..
4114  */
4115 int
4116 ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4)
4117 {
4118 	boolean_t global_policy_present;
4119 
4120 	/*
4121 	 * There is no policy latching for ICMP sockets because we can't
4122 	 * decide on which policy to use until we see the packet and get
4123 	 * type/code selectors.
4124 	 */
4125 	if (connp->conn_ulp == IPPROTO_ICMP ||
4126 	    connp->conn_ulp == IPPROTO_ICMPV6) {
4127 		connp->conn_in_enforce_policy =
4128 		    connp->conn_out_enforce_policy = B_TRUE;
4129 		if (connp->conn_latch != NULL) {
4130 			IPLATCH_REFRELE(connp->conn_latch);
4131 			connp->conn_latch = NULL;
4132 		}
4133 		connp->conn_flags |= IPCL_CHECK_POLICY;
4134 		return (0);
4135 	}
4136 
4137 	global_policy_present = isv4 ?
4138 	    (ipsec_outbound_v4_policy_present ||
4139 		ipsec_inbound_v4_policy_present) :
4140 	    (ipsec_outbound_v6_policy_present ||
4141 		ipsec_inbound_v6_policy_present);
4142 
4143 	if ((connp->conn_policy != NULL) || global_policy_present) {
4144 		ipsec_selector_t sel;
4145 		ipsec_policy_t	*p;
4146 
4147 		if (connp->conn_latch == NULL &&
4148 		    (connp->conn_latch = iplatch_create()) == NULL) {
4149 			return (ENOMEM);
4150 		}
4151 
4152 		sel.ips_protocol = connp->conn_ulp;
4153 		sel.ips_local_port = connp->conn_lport;
4154 		sel.ips_remote_port = connp->conn_fport;
4155 		sel.ips_is_icmp_inv_acq = 0;
4156 		sel.ips_isv4 = isv4;
4157 		if (isv4) {
4158 			sel.ips_local_addr_v4 = connp->conn_src;
4159 			sel.ips_remote_addr_v4 = connp->conn_rem;
4160 		} else {
4161 			sel.ips_local_addr_v6 = connp->conn_srcv6;
4162 			sel.ips_remote_addr_v6 = connp->conn_remv6;
4163 		}
4164 
4165 		p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel);
4166 		if (connp->conn_latch->ipl_in_policy != NULL)
4167 			IPPOL_REFRELE(connp->conn_latch->ipl_in_policy);
4168 		connp->conn_latch->ipl_in_policy = p;
4169 		connp->conn_in_enforce_policy = (p != NULL);
4170 
4171 		p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel);
4172 		if (connp->conn_latch->ipl_out_policy != NULL)
4173 			IPPOL_REFRELE(connp->conn_latch->ipl_out_policy);
4174 		connp->conn_latch->ipl_out_policy = p;
4175 		connp->conn_out_enforce_policy = (p != NULL);
4176 
4177 		/* Clear the latched actions too, in case we're recaching. */
4178 		if (connp->conn_latch->ipl_out_action != NULL)
4179 			IPACT_REFRELE(connp->conn_latch->ipl_out_action);
4180 		if (connp->conn_latch->ipl_in_action != NULL)
4181 			IPACT_REFRELE(connp->conn_latch->ipl_in_action);
4182 	}
4183 
4184 	/*
4185 	 * We may or may not have policy for this endpoint.  We still set
4186 	 * conn_policy_cached so that inbound datagrams don't have to look
4187 	 * at global policy as policy is considered latched for these
4188 	 * endpoints.  We should not set conn_policy_cached until the conn
4189 	 * reflects the actual policy. If we *set* this before inheriting
4190 	 * the policy there is a window where the check
4191 	 * CONN_INBOUND_POLICY_PRESENT, will neither check with the policy
4192 	 * on the conn (because we have not yet copied the policy on to
4193 	 * conn and hence not set conn_in_enforce_policy) nor with the
4194 	 * global policy (because conn_policy_cached is already set).
4195 	 */
4196 	connp->conn_policy_cached = B_TRUE;
4197 	if (connp->conn_in_enforce_policy)
4198 		connp->conn_flags |= IPCL_CHECK_POLICY;
4199 	return (0);
4200 }
4201 
4202 void
4203 iplatch_free(ipsec_latch_t *ipl)
4204 {
4205 	if (ipl->ipl_out_policy != NULL)
4206 		IPPOL_REFRELE(ipl->ipl_out_policy);
4207 	if (ipl->ipl_in_policy != NULL)
4208 		IPPOL_REFRELE(ipl->ipl_in_policy);
4209 	if (ipl->ipl_in_action != NULL)
4210 		IPACT_REFRELE(ipl->ipl_in_action);
4211 	if (ipl->ipl_out_action != NULL)
4212 		IPACT_REFRELE(ipl->ipl_out_action);
4213 	if (ipl->ipl_local_cid != NULL)
4214 		IPSID_REFRELE(ipl->ipl_local_cid);
4215 	if (ipl->ipl_remote_cid != NULL)
4216 		IPSID_REFRELE(ipl->ipl_remote_cid);
4217 	if (ipl->ipl_local_id != NULL)
4218 		crfree(ipl->ipl_local_id);
4219 	mutex_destroy(&ipl->ipl_lock);
4220 	kmem_free(ipl, sizeof (*ipl));
4221 }
4222 
4223 ipsec_latch_t *
4224 iplatch_create()
4225 {
4226 	ipsec_latch_t *ipl = kmem_alloc(sizeof (*ipl), KM_NOSLEEP);
4227 	if (ipl == NULL)
4228 		return (ipl);
4229 	bzero(ipl, sizeof (*ipl));
4230 	mutex_init(&ipl->ipl_lock, NULL, MUTEX_DEFAULT, NULL);
4231 	ipl->ipl_refcnt = 1;
4232 	return (ipl);
4233 }
4234 
4235 /*
4236  * Identity hash table.
4237  *
4238  * Identities are refcounted and "interned" into the hash table.
4239  * Only references coming from other objects (SA's, latching state)
4240  * are counted in ipsid_refcnt.
4241  *
4242  * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket
4243  * is locked, (b) we know that the refcount must be > 0.
4244  *
4245  * The ipsid_next and ipsid_ptpn fields are only to be referenced or
4246  * modified when the bucket lock is held; in particular, we only
4247  * delete objects while holding the bucket lock, and we only increase
4248  * the refcount from 0 to 1 while the bucket lock is held.
4249  */
4250 
4251 #define	IPSID_HASHSIZE 64
4252 
4253 typedef struct ipsif_s
4254 {
4255 	ipsid_t *ipsif_head;
4256 	kmutex_t ipsif_lock;
4257 } ipsif_t;
4258 
4259 ipsif_t ipsid_buckets[IPSID_HASHSIZE];
4260 
4261 /*
4262  * Hash function for ID hash table.
4263  */
4264 static uint32_t
4265 ipsid_hash(int idtype, char *idstring)
4266 {
4267 	uint32_t hval = idtype;
4268 	unsigned char c;
4269 
4270 	while ((c = *idstring++) != 0) {
4271 		hval = (hval << 4) | (hval >> 28);
4272 		hval ^= c;
4273 	}
4274 	hval = hval ^ (hval >> 16);
4275 	return (hval & (IPSID_HASHSIZE-1));
4276 }
4277 
4278 /*
4279  * Look up identity string in hash table.  Return identity object
4280  * corresponding to the name -- either preexisting, or newly allocated.
4281  *
4282  * Return NULL if we need to allocate a new one and can't get memory.
4283  */
4284 ipsid_t *
4285 ipsid_lookup(int idtype, char *idstring)
4286 {
4287 	ipsid_t *retval;
4288 	char *nstr;
4289 	int idlen = strlen(idstring) + 1;
4290 
4291 	ipsif_t *bucket = &ipsid_buckets[ipsid_hash(idtype, idstring)];
4292 
4293 	mutex_enter(&bucket->ipsif_lock);
4294 
4295 	for (retval = bucket->ipsif_head; retval != NULL;
4296 	    retval = retval->ipsid_next) {
4297 		if (idtype != retval->ipsid_type)
4298 			continue;
4299 		if (bcmp(idstring, retval->ipsid_cid, idlen) != 0)
4300 			continue;
4301 
4302 		IPSID_REFHOLD(retval);
4303 		mutex_exit(&bucket->ipsif_lock);
4304 		return (retval);
4305 	}
4306 
4307 	retval = kmem_alloc(sizeof (*retval), KM_NOSLEEP);
4308 	if (!retval) {
4309 		mutex_exit(&bucket->ipsif_lock);
4310 		return (NULL);
4311 	}
4312 
4313 	nstr = kmem_alloc(idlen, KM_NOSLEEP);
4314 	if (!nstr) {
4315 		mutex_exit(&bucket->ipsif_lock);
4316 		kmem_free(retval, sizeof (*retval));
4317 		return (NULL);
4318 	}
4319 
4320 	retval->ipsid_refcnt = 1;
4321 	retval->ipsid_next = bucket->ipsif_head;
4322 	if (retval->ipsid_next != NULL)
4323 		retval->ipsid_next->ipsid_ptpn = &retval->ipsid_next;
4324 	retval->ipsid_ptpn = &bucket->ipsif_head;
4325 	retval->ipsid_type = idtype;
4326 	retval->ipsid_cid = nstr;
4327 	bucket->ipsif_head = retval;
4328 	bcopy(idstring, nstr, idlen);
4329 	mutex_exit(&bucket->ipsif_lock);
4330 
4331 	return (retval);
4332 }
4333 
4334 /*
4335  * Garbage collect the identity hash table.
4336  */
4337 void
4338 ipsid_gc()
4339 {
4340 	int i, len;
4341 	ipsid_t *id, *nid;
4342 	ipsif_t *bucket;
4343 
4344 	for (i = 0; i < IPSID_HASHSIZE; i++) {
4345 		bucket = &ipsid_buckets[i];
4346 		mutex_enter(&bucket->ipsif_lock);
4347 		for (id = bucket->ipsif_head; id != NULL; id = nid) {
4348 			nid = id->ipsid_next;
4349 			if (id->ipsid_refcnt == 0) {
4350 				*id->ipsid_ptpn = nid;
4351 				if (nid != NULL)
4352 					nid->ipsid_ptpn = id->ipsid_ptpn;
4353 				len = strlen(id->ipsid_cid) + 1;
4354 				kmem_free(id->ipsid_cid, len);
4355 				kmem_free(id, sizeof (*id));
4356 			}
4357 		}
4358 		mutex_exit(&bucket->ipsif_lock);
4359 	}
4360 }
4361 
4362 /*
4363  * Return true if two identities are the same.
4364  */
4365 boolean_t
4366 ipsid_equal(ipsid_t *id1, ipsid_t *id2)
4367 {
4368 	if (id1 == id2)
4369 		return (B_TRUE);
4370 #ifdef DEBUG
4371 	if ((id1 == NULL) || (id2 == NULL))
4372 		return (B_FALSE);
4373 	/*
4374 	 * test that we're interning id's correctly..
4375 	 */
4376 	ASSERT((strcmp(id1->ipsid_cid, id2->ipsid_cid) != 0) ||
4377 	    (id1->ipsid_type != id2->ipsid_type));
4378 #endif
4379 	return (B_FALSE);
4380 }
4381 
4382 /*
4383  * Initialize identity table; called during module initialization.
4384  */
4385 static void
4386 ipsid_init()
4387 {
4388 	ipsif_t *bucket;
4389 	int i;
4390 
4391 	for (i = 0; i < IPSID_HASHSIZE; i++) {
4392 		bucket = &ipsid_buckets[i];
4393 		mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL);
4394 	}
4395 }
4396 
4397 /*
4398  * Free identity table (preparatory to module unload)
4399  */
4400 static void
4401 ipsid_fini()
4402 {
4403 	ipsif_t *bucket;
4404 	int i;
4405 
4406 	for (i = 0; i < IPSID_HASHSIZE; i++) {
4407 		bucket = &ipsid_buckets[i];
4408 		mutex_destroy(&bucket->ipsif_lock);
4409 	}
4410 }
4411 
4412 /*
4413  * Update the minimum and maximum supported key sizes for the
4414  * specified algorithm. Must be called while holding the algorithms lock.
4415  */
4416 void
4417 ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type)
4418 {
4419 	size_t crypto_min = (size_t)-1, crypto_max = 0;
4420 	size_t cur_crypto_min, cur_crypto_max;
4421 	boolean_t is_valid;
4422 	crypto_mechanism_info_t *mech_infos;
4423 	uint_t nmech_infos;
4424 	int crypto_rc, i;
4425 	crypto_mech_usage_t mask;
4426 
4427 	ASSERT(MUTEX_HELD(&alg_lock));
4428 
4429 	/*
4430 	 * Compute the min, max, and default key sizes (in number of
4431 	 * increments to the default key size in bits) as defined
4432 	 * by the algorithm mappings. This range of key sizes is used
4433 	 * for policy related operations. The effective key sizes
4434 	 * supported by the framework could be more limited than
4435 	 * those defined for an algorithm.
4436 	 */
4437 	alg->alg_default_bits = alg->alg_key_sizes[0];
4438 	if (alg->alg_increment != 0) {
4439 		/* key sizes are defined by range & increment */
4440 		alg->alg_minbits = alg->alg_key_sizes[1];
4441 		alg->alg_maxbits = alg->alg_key_sizes[2];
4442 
4443 		alg->alg_default = SADB_ALG_DEFAULT_INCR(alg->alg_minbits,
4444 		    alg->alg_increment, alg->alg_default_bits);
4445 	} else if (alg->alg_nkey_sizes == 0) {
4446 		/* no specified key size for algorithm */
4447 		alg->alg_minbits = alg->alg_maxbits = 0;
4448 	} else {
4449 		/* key sizes are defined by enumeration */
4450 		alg->alg_minbits = (uint16_t)-1;
4451 		alg->alg_maxbits = 0;
4452 
4453 		for (i = 0; i < alg->alg_nkey_sizes; i++) {
4454 			if (alg->alg_key_sizes[i] < alg->alg_minbits)
4455 				alg->alg_minbits = alg->alg_key_sizes[i];
4456 			if (alg->alg_key_sizes[i] > alg->alg_maxbits)
4457 				alg->alg_maxbits = alg->alg_key_sizes[i];
4458 		}
4459 		alg->alg_default = 0;
4460 	}
4461 
4462 	if (!(alg->alg_flags & ALG_FLAG_VALID))
4463 		return;
4464 
4465 	/*
4466 	 * Mechanisms do not apply to the NULL encryption
4467 	 * algorithm, so simply return for this case.
4468 	 */
4469 	if (alg->alg_id == SADB_EALG_NULL)
4470 		return;
4471 
4472 	/*
4473 	 * Find the min and max key sizes supported by the cryptographic
4474 	 * framework providers.
4475 	 */
4476 
4477 	/* get the key sizes supported by the framework */
4478 	crypto_rc = crypto_get_all_mech_info(alg->alg_mech_type,
4479 	    &mech_infos, &nmech_infos, KM_SLEEP);
4480 	if (crypto_rc != CRYPTO_SUCCESS || nmech_infos == 0) {
4481 		alg->alg_flags &= ~ALG_FLAG_VALID;
4482 		return;
4483 	}
4484 
4485 	/* min and max key sizes supported by framework */
4486 	for (i = 0, is_valid = B_FALSE; i < nmech_infos; i++) {
4487 		int unit_bits;
4488 
4489 		/*
4490 		 * Ignore entries that do not support the operations
4491 		 * needed for the algorithm type.
4492 		 */
4493 		if (alg_type == IPSEC_ALG_AUTH)
4494 			mask = CRYPTO_MECH_USAGE_MAC;
4495 		else
4496 			mask = CRYPTO_MECH_USAGE_ENCRYPT |
4497 				CRYPTO_MECH_USAGE_DECRYPT;
4498 		if ((mech_infos[i].mi_usage & mask) != mask)
4499 			continue;
4500 
4501 		unit_bits = (mech_infos[i].mi_keysize_unit ==
4502 		    CRYPTO_KEYSIZE_UNIT_IN_BYTES)  ? 8 : 1;
4503 		/* adjust min/max supported by framework */
4504 		cur_crypto_min = mech_infos[i].mi_min_key_size * unit_bits;
4505 		cur_crypto_max = mech_infos[i].mi_max_key_size * unit_bits;
4506 
4507 		if (cur_crypto_min < crypto_min)
4508 			crypto_min = cur_crypto_min;
4509 
4510 		/*
4511 		 * CRYPTO_EFFECTIVELY_INFINITE is a special value of
4512 		 * the crypto framework which means "no upper limit".
4513 		 */
4514 		if (mech_infos[i].mi_max_key_size ==
4515 		    CRYPTO_EFFECTIVELY_INFINITE)
4516 			crypto_max = (size_t)-1;
4517 		else if (cur_crypto_max > crypto_max)
4518 			crypto_max = cur_crypto_max;
4519 
4520 		is_valid = B_TRUE;
4521 	}
4522 
4523 	kmem_free(mech_infos, sizeof (crypto_mechanism_info_t) *
4524 	    nmech_infos);
4525 
4526 	if (!is_valid) {
4527 		/* no key sizes supported by framework */
4528 		alg->alg_flags &= ~ALG_FLAG_VALID;
4529 		return;
4530 	}
4531 
4532 	/*
4533 	 * Determine min and max key sizes from alg_key_sizes[].
4534 	 * defined for the algorithm entry. Adjust key sizes based on
4535 	 * those supported by the framework.
4536 	 */
4537 	alg->alg_ef_default_bits = alg->alg_key_sizes[0];
4538 	if (alg->alg_increment != 0) {
4539 		/* supported key sizes are defined by range  & increment */
4540 		crypto_min = ALGBITS_ROUND_UP(crypto_min, alg->alg_increment);
4541 		crypto_max = ALGBITS_ROUND_DOWN(crypto_max, alg->alg_increment);
4542 
4543 		alg->alg_ef_minbits = MAX(alg->alg_minbits,
4544 		    (uint16_t)crypto_min);
4545 		alg->alg_ef_maxbits = MIN(alg->alg_maxbits,
4546 		    (uint16_t)crypto_max);
4547 
4548 		/*
4549 		 * If the sizes supported by the framework are outside
4550 		 * the range of sizes defined by the algorithm mappings,
4551 		 * the algorithm cannot be used. Check for this
4552 		 * condition here.
4553 		 */
4554 		if (alg->alg_ef_minbits > alg->alg_ef_maxbits) {
4555 			alg->alg_flags &= ~ALG_FLAG_VALID;
4556 			return;
4557 		}
4558 
4559 		if (alg->alg_ef_default_bits < alg->alg_ef_minbits)
4560 		    alg->alg_ef_default_bits = alg->alg_ef_minbits;
4561 		if (alg->alg_ef_default_bits > alg->alg_ef_maxbits)
4562 		    alg->alg_ef_default_bits = alg->alg_ef_maxbits;
4563 
4564 		alg->alg_ef_default = SADB_ALG_DEFAULT_INCR(alg->alg_ef_minbits,
4565 		    alg->alg_increment, alg->alg_ef_default_bits);
4566 	} else if (alg->alg_nkey_sizes == 0) {
4567 		/* no specified key size for algorithm */
4568 		alg->alg_ef_minbits = alg->alg_ef_maxbits = 0;
4569 	} else {
4570 		/* supported key sizes are defined by enumeration */
4571 		alg->alg_ef_minbits = (uint16_t)-1;
4572 		alg->alg_ef_maxbits = 0;
4573 
4574 		for (i = 0, is_valid = B_FALSE; i < alg->alg_nkey_sizes; i++) {
4575 			/*
4576 			 * Ignore the current key size if it is not in the
4577 			 * range of sizes supported by the framework.
4578 			 */
4579 			if (alg->alg_key_sizes[i] < crypto_min ||
4580 			    alg->alg_key_sizes[i] > crypto_max)
4581 				continue;
4582 			if (alg->alg_key_sizes[i] < alg->alg_ef_minbits)
4583 				alg->alg_ef_minbits = alg->alg_key_sizes[i];
4584 			if (alg->alg_key_sizes[i] > alg->alg_ef_maxbits)
4585 				alg->alg_ef_maxbits = alg->alg_key_sizes[i];
4586 			is_valid = B_TRUE;
4587 		}
4588 
4589 		if (!is_valid) {
4590 			alg->alg_flags &= ~ALG_FLAG_VALID;
4591 			return;
4592 		}
4593 		alg->alg_ef_default = 0;
4594 	}
4595 }
4596 
4597 /*
4598  * Free the memory used by the specified algorithm.
4599  */
4600 void
4601 ipsec_alg_free(ipsec_alginfo_t *alg)
4602 {
4603 	if (alg == NULL)
4604 		return;
4605 
4606 	if (alg->alg_key_sizes != NULL)
4607 		kmem_free(alg->alg_key_sizes,
4608 		    (alg->alg_nkey_sizes + 1) * sizeof (uint16_t));
4609 
4610 	if (alg->alg_block_sizes != NULL)
4611 		kmem_free(alg->alg_block_sizes,
4612 		    (alg->alg_nblock_sizes + 1) * sizeof (uint16_t));
4613 
4614 	kmem_free(alg, sizeof (*alg));
4615 }
4616 
4617 /*
4618  * Check the validity of the specified key size for an algorithm.
4619  * Returns B_TRUE if key size is valid, B_FALSE otherwise.
4620  */
4621 boolean_t
4622 ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg)
4623 {
4624 	if (key_size < alg->alg_ef_minbits || key_size > alg->alg_ef_maxbits)
4625 		return (B_FALSE);
4626 
4627 	if (alg->alg_increment == 0 && alg->alg_nkey_sizes != 0) {
4628 		/*
4629 		 * If the key sizes are defined by enumeration, the new
4630 		 * key size must be equal to one of the supported values.
4631 		 */
4632 		int i;
4633 
4634 		for (i = 0; i < alg->alg_nkey_sizes; i++)
4635 			if (key_size == alg->alg_key_sizes[i])
4636 				break;
4637 		if (i == alg->alg_nkey_sizes)
4638 			return (B_FALSE);
4639 	}
4640 
4641 	return (B_TRUE);
4642 }
4643 
4644 /*
4645  * Callback function invoked by the crypto framework when a provider
4646  * registers or unregisters. This callback updates the algorithms
4647  * tables when a crypto algorithm is no longer available or becomes
4648  * available, and triggers the freeing/creation of context templates
4649  * associated with existing SAs, if needed.
4650  */
4651 void
4652 ipsec_prov_update_callback(uint32_t event, void *event_arg)
4653 {
4654 	crypto_notify_event_change_t *prov_change =
4655 	    (crypto_notify_event_change_t *)event_arg;
4656 	uint_t algidx, algid, algtype, mech_count, mech_idx;
4657 	ipsec_alginfo_t *alg;
4658 	ipsec_alginfo_t oalg;
4659 	crypto_mech_name_t *mechs;
4660 	boolean_t alg_changed = B_FALSE;
4661 
4662 	/* ignore events for which we didn't register */
4663 	if (event != CRYPTO_EVENT_PROVIDERS_CHANGE) {
4664 		ip1dbg(("ipsec_prov_update_callback: unexpected event 0x%x "
4665 			" received from crypto framework\n", event));
4666 		return;
4667 	}
4668 
4669 	mechs = crypto_get_mech_list(&mech_count, KM_SLEEP);
4670 	if (mechs == NULL)
4671 		return;
4672 
4673 	/*
4674 	 * Walk the list of currently defined IPsec algorithm. Update
4675 	 * the algorithm valid flag and trigger an update of the
4676 	 * SAs that depend on that algorithm.
4677 	 */
4678 	mutex_enter(&alg_lock);
4679 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
4680 		for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) {
4681 
4682 			algid = ipsec_sortlist[algtype][algidx];
4683 			alg = ipsec_alglists[algtype][algid];
4684 			ASSERT(alg != NULL);
4685 
4686 			/*
4687 			 * Skip the algorithms which do not map to the
4688 			 * crypto framework provider being added or removed.
4689 			 */
4690 			if (strncmp(alg->alg_mech_name,
4691 			    prov_change->ec_mech_name,
4692 			    CRYPTO_MAX_MECH_NAME) != 0)
4693 				continue;
4694 
4695 			/*
4696 			 * Determine if the mechanism is valid. If it
4697 			 * is not, mark the algorithm as being invalid. If
4698 			 * it is, mark the algorithm as being valid.
4699 			 */
4700 			for (mech_idx = 0; mech_idx < mech_count; mech_idx++)
4701 				if (strncmp(alg->alg_mech_name,
4702 				    mechs[mech_idx], CRYPTO_MAX_MECH_NAME) == 0)
4703 					break;
4704 			if (mech_idx == mech_count &&
4705 			    alg->alg_flags & ALG_FLAG_VALID) {
4706 				alg->alg_flags &= ~ALG_FLAG_VALID;
4707 				alg_changed = B_TRUE;
4708 			} else if (mech_idx < mech_count &&
4709 			    !(alg->alg_flags & ALG_FLAG_VALID)) {
4710 				alg->alg_flags |= ALG_FLAG_VALID;
4711 				alg_changed = B_TRUE;
4712 			}
4713 
4714 			/*
4715 			 * Update the supported key sizes, regardless
4716 			 * of whether a crypto provider was added or
4717 			 * removed.
4718 			 */
4719 			oalg = *alg;
4720 			ipsec_alg_fix_min_max(alg, algtype);
4721 			if (!alg_changed &&
4722 			    alg->alg_ef_minbits != oalg.alg_ef_minbits ||
4723 			    alg->alg_ef_maxbits != oalg.alg_ef_maxbits ||
4724 			    alg->alg_ef_default != oalg.alg_ef_default ||
4725 			    alg->alg_ef_default_bits !=
4726 			    oalg.alg_ef_default_bits)
4727 				alg_changed = B_TRUE;
4728 
4729 			/*
4730 			 * Update the affected SAs if a software provider is
4731 			 * being added or removed.
4732 			 */
4733 			if (prov_change->ec_provider_type ==
4734 			    CRYPTO_SW_PROVIDER)
4735 				sadb_alg_update(algtype, alg->alg_id,
4736 				    prov_change->ec_change ==
4737 				    CRYPTO_EVENT_CHANGE_ADDED);
4738 		}
4739 	}
4740 	mutex_exit(&alg_lock);
4741 	crypto_free_mech_list(mechs, mech_count);
4742 
4743 	if (alg_changed) {
4744 		/*
4745 		 * An algorithm has changed, i.e. it became valid or
4746 		 * invalid, or its support key sizes have changed.
4747 		 * Notify ipsecah and ipsecesp of this change so
4748 		 * that they can send a SADB_REGISTER to their consumers.
4749 		 */
4750 		ipsecah_algs_changed();
4751 		ipsecesp_algs_changed();
4752 	}
4753 }
4754 
4755 /*
4756  * Registers with the crypto framework to be notified of crypto
4757  * providers changes. Used to update the algorithm tables and
4758  * to free or create context templates if needed. Invoked after IPsec
4759  * is loaded successfully.
4760  */
4761 void
4762 ipsec_register_prov_update(void)
4763 {
4764 	prov_update_handle = crypto_notify_events(
4765 	    ipsec_prov_update_callback, CRYPTO_EVENT_PROVIDERS_CHANGE);
4766 }
4767 
4768 /*
4769  * Unregisters from the framework to be notified of crypto providers
4770  * changes. Called from ipsec_policy_destroy().
4771  */
4772 static void
4773 ipsec_unregister_prov_update(void)
4774 {
4775 	if (prov_update_handle != NULL)
4776 		crypto_unnotify_events(prov_update_handle);
4777 }
4778