xref: /titanic_52/usr/src/uts/common/inet/ip/sadb.c (revision b6c3f7863936abeae522e48a13887dddeb691a45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <inet/common.h>
49 #include <netinet/ip6.h>
50 #include <inet/ip.h>
51 #include <inet/ip_ire.h>
52 #include <inet/ip6.h>
53 #include <inet/ipsec_info.h>
54 #include <inet/tcp.h>
55 #include <inet/sadb.h>
56 #include <inet/ipsec_impl.h>
57 #include <inet/ipsecah.h>
58 #include <inet/ipsecesp.h>
59 #include <sys/random.h>
60 #include <sys/dlpi.h>
61 #include <sys/iphada.h>
62 #include <inet/ip_if.h>
63 #include <inet/ipdrop.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/sctp_ip.h>
66 #include <inet/tun.h>
67 
68 /*
69  * This source file contains Security Association Database (SADB) common
70  * routines.  They are linked in with the AH module.  Since AH has no chance
71  * of falling under export control, it was safe to link it in there.
72  */
73 
74 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
75     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
76 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
77 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
78 static void sadb_drain_torchq(queue_t *, mblk_t *);
79 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
80 			    netstack_t *);
81 static void sadb_destroy(sadb_t *, netstack_t *);
82 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
83 
84 static time_t sadb_add_time(time_t, uint64_t);
85 
86 /*
87  * ipsacq_maxpackets is defined here to make it tunable
88  * from /etc/system.
89  */
90 extern uint64_t ipsacq_maxpackets;
91 
92 #define	SET_EXPIRE(sa, delta, exp) {				\
93 	if (((sa)->ipsa_ ## delta) != 0) {				\
94 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
95 			(sa)->ipsa_ ## delta);				\
96 	}								\
97 }
98 
99 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
100 	if (((sa)->ipsa_ ## delta) != 0) {				\
101 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
102 			(sa)->ipsa_ ## delta);				\
103 		if (((sa)->ipsa_ ## exp) == 0)				\
104 			(sa)->ipsa_ ## exp = tmp;			\
105 		else							\
106 			(sa)->ipsa_ ## exp = 				\
107 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
108 	}								\
109 }
110 
111 
112 /* wrap the macro so we can pass it as a function pointer */
113 void
114 sadb_sa_refrele(void *target)
115 {
116 	IPSA_REFRELE(((ipsa_t *)target));
117 }
118 
119 /*
120  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
121  * a signed type.
122  */
123 #define	TIME_MAX LONG_MAX
124 
125 /*
126  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
127  * time_t is defined to be a signed type with the same range as
128  * "long".  On ILP32 systems, we thus run the risk of wrapping around
129  * at end of time, as well as "overwrapping" the clock back around
130  * into a seemingly valid but incorrect future date earlier than the
131  * desired expiration.
132  *
133  * In order to avoid odd behavior (either negative lifetimes or loss
134  * of high order bits) when someone asks for bizarrely long SA
135  * lifetimes, we do a saturating add for expire times.
136  *
137  * We presume that ILP32 systems will be past end of support life when
138  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
139  *
140  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
141  * will hopefully have figured out clever ways to avoid the use of
142  * fixed-sized integers in computation.
143  */
144 static time_t
145 sadb_add_time(time_t base, uint64_t delta)
146 {
147 	time_t sum;
148 
149 	/*
150 	 * Clip delta to the maximum possible time_t value to
151 	 * prevent "overwrapping" back into a shorter-than-desired
152 	 * future time.
153 	 */
154 	if (delta > TIME_MAX)
155 		delta = TIME_MAX;
156 	/*
157 	 * This sum may still overflow.
158 	 */
159 	sum = base + delta;
160 
161 	/*
162 	 * .. so if the result is less than the base, we overflowed.
163 	 */
164 	if (sum < base)
165 		sum = TIME_MAX;
166 
167 	return (sum);
168 }
169 
170 /*
171  * Callers of this function have already created a working security
172  * association, and have found the appropriate table & hash chain.  All this
173  * function does is check duplicates, and insert the SA.  The caller needs to
174  * hold the hash bucket lock and increment the refcnt before insertion.
175  *
176  * Return 0 if success, EEXIST if collision.
177  */
178 #define	SA_UNIQUE_MATCH(sa1, sa2) \
179 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
180 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
181 
182 int
183 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
184 {
185 	ipsa_t **ptpn = NULL;
186 	ipsa_t *walker;
187 	boolean_t unspecsrc;
188 
189 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
190 
191 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
192 
193 	walker = bucket->isaf_ipsa;
194 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
195 
196 	/*
197 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
198 	 * of the list unless there's an unspecified source address, then
199 	 * insert it after the last SA with a specified source address.
200 	 *
201 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
202 	 * checking for collisions.
203 	 */
204 
205 	while (walker != NULL) {
206 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
207 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
208 			if (walker->ipsa_spi == ipsa->ipsa_spi)
209 				return (EEXIST);
210 
211 			mutex_enter(&walker->ipsa_lock);
212 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
213 			    (walker->ipsa_flags & IPSA_F_USED) &&
214 			    SA_UNIQUE_MATCH(walker, ipsa)) {
215 				walker->ipsa_flags |= IPSA_F_CINVALID;
216 			}
217 			mutex_exit(&walker->ipsa_lock);
218 		}
219 
220 		if (ptpn == NULL && unspecsrc) {
221 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
222 			    walker->ipsa_addrfam))
223 				ptpn = walker->ipsa_ptpn;
224 			else if (walker->ipsa_next == NULL)
225 				ptpn = &walker->ipsa_next;
226 		}
227 
228 		walker = walker->ipsa_next;
229 	}
230 
231 	if (ptpn == NULL)
232 		ptpn = &bucket->isaf_ipsa;
233 	ipsa->ipsa_next = *ptpn;
234 	ipsa->ipsa_ptpn = ptpn;
235 	if (ipsa->ipsa_next != NULL)
236 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
237 	*ptpn = ipsa;
238 	ipsa->ipsa_linklock = &bucket->isaf_lock;
239 
240 	return (0);
241 }
242 #undef SA_UNIQUE_MATCH
243 
244 /*
245  * Free a security association.  Its reference count is 0, which means
246  * I must free it.  The SA must be unlocked and must not be linked into
247  * any fanout list.
248  */
249 static void
250 sadb_freeassoc(ipsa_t *ipsa)
251 {
252 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
253 
254 	ASSERT(ipss != NULL);
255 	ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock));
256 	ASSERT(ipsa->ipsa_refcnt == 0);
257 	ASSERT(ipsa->ipsa_next == NULL);
258 	ASSERT(ipsa->ipsa_ptpn == NULL);
259 
260 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
261 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
262 	    &ipss->ipsec_sadb_dropper);
263 
264 	mutex_enter(&ipsa->ipsa_lock);
265 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
266 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
267 	mutex_exit(&ipsa->ipsa_lock);
268 
269 	/* bzero() these fields for paranoia's sake. */
270 	if (ipsa->ipsa_authkey != NULL) {
271 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
272 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
273 	}
274 	if (ipsa->ipsa_encrkey != NULL) {
275 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
276 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
277 	}
278 	if (ipsa->ipsa_src_cid != NULL) {
279 		IPSID_REFRELE(ipsa->ipsa_src_cid);
280 	}
281 	if (ipsa->ipsa_dst_cid != NULL) {
282 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
283 	}
284 	if (ipsa->ipsa_integ != NULL)
285 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
286 	if (ipsa->ipsa_sens != NULL)
287 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
288 
289 	mutex_destroy(&ipsa->ipsa_lock);
290 	kmem_free(ipsa, sizeof (*ipsa));
291 }
292 
293 /*
294  * Unlink a security association from a hash bucket.  Assume the hash bucket
295  * lock is held, but the association's lock is not.
296  *
297  * Note that we do not bump the bucket's generation number here because
298  * we might not be making a visible change to the set of visible SA's.
299  * All callers MUST bump the bucket's generation number before they unlock
300  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
301  * was present in the bucket at the time it was locked.
302  */
303 void
304 sadb_unlinkassoc(ipsa_t *ipsa)
305 {
306 	ASSERT(ipsa->ipsa_linklock != NULL);
307 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
308 
309 	/* These fields are protected by the link lock. */
310 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
311 	if (ipsa->ipsa_next != NULL) {
312 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
313 		ipsa->ipsa_next = NULL;
314 	}
315 
316 	ipsa->ipsa_ptpn = NULL;
317 
318 	/* This may destroy the SA. */
319 	IPSA_REFRELE(ipsa);
320 }
321 
322 /*
323  * Create a larval security association with the specified SPI.	 All other
324  * fields are zeroed.
325  */
326 static ipsa_t *
327 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
328     netstack_t *ns)
329 {
330 	ipsa_t *newbie;
331 
332 	/*
333 	 * Allocate...
334 	 */
335 
336 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
337 	if (newbie == NULL) {
338 		/* Can't make new larval SA. */
339 		return (NULL);
340 	}
341 
342 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
343 	newbie->ipsa_spi = spi;
344 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
345 
346 	/*
347 	 * Copy addresses...
348 	 */
349 
350 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
351 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
352 
353 	newbie->ipsa_addrfam = addrfam;
354 
355 	/*
356 	 * Set common initialization values, including refcnt.
357 	 */
358 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
359 	newbie->ipsa_state = IPSA_STATE_LARVAL;
360 	newbie->ipsa_refcnt = 1;
361 	newbie->ipsa_freefunc = sadb_freeassoc;
362 
363 	/*
364 	 * There aren't a lot of other common initialization values, as
365 	 * they are copied in from the PF_KEY message.
366 	 */
367 
368 	return (newbie);
369 }
370 
371 /*
372  * Call me to initialize a security association fanout.
373  */
374 static int
375 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
376 {
377 	isaf_t *table;
378 	int i;
379 
380 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
381 	*tablep = table;
382 
383 	if (table == NULL)
384 		return (ENOMEM);
385 
386 	for (i = 0; i < size; i++) {
387 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
388 		table[i].isaf_ipsa = NULL;
389 		table[i].isaf_gen = 0;
390 	}
391 
392 	return (0);
393 }
394 
395 /*
396  * Call me to initialize an acquire fanout
397  */
398 static int
399 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
400 {
401 	iacqf_t *table;
402 	int i;
403 
404 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
405 	*tablep = table;
406 
407 	if (table == NULL)
408 		return (ENOMEM);
409 
410 	for (i = 0; i < size; i++) {
411 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
412 		table[i].iacqf_ipsacq = NULL;
413 	}
414 
415 	return (0);
416 }
417 
418 /*
419  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
420  * caller must clean up partial allocations.
421  */
422 static int
423 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
424 {
425 	ASSERT(sp->sdb_of == NULL);
426 	ASSERT(sp->sdb_if == NULL);
427 	ASSERT(sp->sdb_acq == NULL);
428 
429 	sp->sdb_hashsize = size;
430 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
431 		return (ENOMEM);
432 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
433 		return (ENOMEM);
434 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
435 		return (ENOMEM);
436 
437 	return (0);
438 }
439 
440 /*
441  * Call me to initialize an SADB instance; fall back to default size on failure.
442  */
443 static void
444 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
445     netstack_t *ns)
446 {
447 	ASSERT(sp->sdb_of == NULL);
448 	ASSERT(sp->sdb_if == NULL);
449 	ASSERT(sp->sdb_acq == NULL);
450 
451 	if (size < IPSEC_DEFAULT_HASH_SIZE)
452 		size = IPSEC_DEFAULT_HASH_SIZE;
453 
454 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
455 
456 		cmn_err(CE_WARN,
457 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
458 		    size, ver, name);
459 
460 		sadb_destroy(sp, ns);
461 		size = IPSEC_DEFAULT_HASH_SIZE;
462 		cmn_err(CE_WARN, "Falling back to %d entries", size);
463 		(void) sadb_init_trial(sp, size, KM_SLEEP);
464 	}
465 }
466 
467 
468 /*
469  * Initialize an SADB-pair.
470  */
471 void
472 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
473 {
474 	sadb_init(name, &sp->s_v4, size, 4, ns);
475 	sadb_init(name, &sp->s_v6, size, 6, ns);
476 
477 	sp->s_satype = type;
478 
479 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
480 	if (type == SADB_SATYPE_AH) {
481 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
482 
483 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
484 	}
485 }
486 
487 /*
488  * Deliver a single SADB_DUMP message representing a single SA.  This is
489  * called many times by sadb_dump().
490  *
491  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
492  * the caller should take that as a hint that dupb() on the "original answer"
493  * failed, and that perhaps the caller should try again with a copyb()ed
494  * "original answer".
495  */
496 static int
497 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
498     sadb_msg_t *samsg)
499 {
500 	mblk_t *answer;
501 
502 	answer = dupb(original_answer);
503 	if (answer == NULL)
504 		return (ENOBUFS);
505 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
506 	if (answer->b_cont == NULL) {
507 		freeb(answer);
508 		return (ENOMEM);
509 	}
510 
511 	/* Just do a putnext, and let keysock deal with flow control. */
512 	putnext(pfkey_q, answer);
513 	return (0);
514 }
515 
516 /*
517  * Common function to allocate and prepare a keysock_out_t M_CTL message.
518  */
519 mblk_t *
520 sadb_keysock_out(minor_t serial)
521 {
522 	mblk_t *mp;
523 	keysock_out_t *kso;
524 
525 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
526 	if (mp != NULL) {
527 		mp->b_datap->db_type = M_CTL;
528 		mp->b_wptr += sizeof (ipsec_info_t);
529 		kso = (keysock_out_t *)mp->b_rptr;
530 		kso->ks_out_type = KEYSOCK_OUT;
531 		kso->ks_out_len = sizeof (*kso);
532 		kso->ks_out_serial = serial;
533 	}
534 
535 	return (mp);
536 }
537 
538 /*
539  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
540  * to keysock.
541  */
542 static int
543 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
544     int num_entries, boolean_t do_peers)
545 {
546 	int i, error = 0;
547 	mblk_t *original_answer;
548 	ipsa_t *walker;
549 	sadb_msg_t *samsg;
550 
551 	/*
552 	 * For each IPSA hash bucket do:
553 	 *	- Hold the mutex
554 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
555 	 */
556 	ASSERT(mp->b_cont != NULL);
557 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
558 
559 	original_answer = sadb_keysock_out(serial);
560 	if (original_answer == NULL)
561 		return (ENOMEM);
562 
563 	for (i = 0; i < num_entries; i++) {
564 		mutex_enter(&fanout[i].isaf_lock);
565 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
566 		    walker = walker->ipsa_next) {
567 			if (!do_peers && walker->ipsa_haspeer)
568 				continue;
569 			error = sadb_dump_deliver(pfkey_q, original_answer,
570 			    walker, samsg);
571 			if (error == ENOBUFS) {
572 				mblk_t *new_original_answer;
573 
574 				/* Ran out of dupb's.  Try a copyb. */
575 				new_original_answer = copyb(original_answer);
576 				if (new_original_answer == NULL) {
577 					error = ENOMEM;
578 				} else {
579 					freeb(original_answer);
580 					original_answer = new_original_answer;
581 					error = sadb_dump_deliver(pfkey_q,
582 					    original_answer, walker, samsg);
583 				}
584 			}
585 			if (error != 0)
586 				break;	/* out of for loop. */
587 		}
588 		mutex_exit(&fanout[i].isaf_lock);
589 		if (error != 0)
590 			break;	/* out of for loop. */
591 	}
592 
593 	freeb(original_answer);
594 	return (error);
595 }
596 
597 /*
598  * Dump an entire SADB; outbound first, then inbound.
599  */
600 
601 int
602 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
603 {
604 	int error;
605 
606 	/* Dump outbound */
607 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
608 	    sp->sdb_hashsize, B_TRUE);
609 	if (error)
610 		return (error);
611 
612 	/* Dump inbound */
613 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
614 	    sp->sdb_hashsize, B_FALSE);
615 }
616 
617 /*
618  * Generic sadb table walker.
619  *
620  * Call "walkfn" for each SA in each bucket in "table"; pass the
621  * bucket, the entry and "cookie" to the callback function.
622  * Take care to ensure that walkfn can delete the SA without screwing
623  * up our traverse.
624  *
625  * The bucket is locked for the duration of the callback, both so that the
626  * callback can just call sadb_unlinkassoc() when it wants to delete something,
627  * and so that no new entries are added while we're walking the list.
628  */
629 static void
630 sadb_walker(isaf_t *table, uint_t numentries,
631     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
632     void *cookie)
633 {
634 	int i;
635 	for (i = 0; i < numentries; i++) {
636 		ipsa_t *entry, *next;
637 
638 		mutex_enter(&table[i].isaf_lock);
639 
640 		for (entry = table[i].isaf_ipsa; entry != NULL;
641 		    entry = next) {
642 			next = entry->ipsa_next;
643 			(*walkfn)(&table[i], entry, cookie);
644 		}
645 		mutex_exit(&table[i].isaf_lock);
646 	}
647 }
648 
649 /*
650  * From the given SA, construct a dl_ct_ipsec_key and
651  * a dl_ct_ipsec structures to be sent to the adapter as part
652  * of a DL_CONTROL_REQ.
653  *
654  * ct_sa must point to the storage allocated for the key
655  * structure and must be followed by storage allocated
656  * for the SA information that must be sent to the driver
657  * as part of the DL_CONTROL_REQ request.
658  *
659  * The is_inbound boolean indicates whether the specified
660  * SA is part of an inbound SA table.
661  *
662  * Returns B_TRUE if the corresponding SA must be passed to
663  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
664  */
665 static boolean_t
666 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
667 {
668 	dl_ct_ipsec_key_t *keyp;
669 	dl_ct_ipsec_t *sap;
670 	void *ct_sa = mp->b_wptr;
671 
672 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
673 
674 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
675 	sap = (dl_ct_ipsec_t *)(keyp + 1);
676 
677 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
678 	    "is_inbound = %d\n", is_inbound));
679 
680 	/* initialize flag */
681 	sap->sadb_sa_flags = 0;
682 	if (is_inbound) {
683 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
684 		/*
685 		 * If an inbound SA has a peer, then mark it has being
686 		 * an outbound SA as well.
687 		 */
688 		if (sa->ipsa_haspeer)
689 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
690 	} else {
691 		/*
692 		 * If an outbound SA has a peer, then don't send it,
693 		 * since we will send the copy from the inbound table.
694 		 */
695 		if (sa->ipsa_haspeer) {
696 			freemsg(mp);
697 			return (B_FALSE);
698 		}
699 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
700 	}
701 
702 	keyp->dl_key_spi = sa->ipsa_spi;
703 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
704 	    DL_CTL_IPSEC_ADDR_LEN);
705 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
706 
707 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
708 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
709 
710 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
711 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
712 	bcopy(sa->ipsa_authkey,
713 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
714 
715 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
716 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
717 	bcopy(sa->ipsa_encrkey,
718 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
719 
720 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
721 	return (B_TRUE);
722 }
723 
724 /*
725  * Called from AH or ESP to format a message which will be used to inform
726  * IPsec-acceleration-capable ills of a SADB change.
727  * (It is not possible to send the message to IP directly from this function
728  * since the SA, if any, is locked during the call).
729  *
730  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
731  * sa_type: identifies whether the operation applies to AH or ESP
732  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
733  * sa: Pointer to an SA.  Must be non-NULL and locked
734  *	for ADD, DELETE, GET, and UPDATE operations.
735  * This function returns an mblk chain that must be passed to IP
736  * for forwarding to the IPsec capable providers.
737  */
738 mblk_t *
739 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
740     boolean_t is_inbound)
741 {
742 	mblk_t *mp;
743 	dl_control_req_t *ctrl;
744 	boolean_t need_key = B_FALSE;
745 	mblk_t *ctl_mp = NULL;
746 	ipsec_ctl_t *ctl;
747 
748 	/*
749 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
750 	 * 2 if a key is needed for the operation
751 	 *    2.1 initialize key
752 	 *    2.2 if a full SA is needed for the operation
753 	 *	2.2.1 initialize full SA info
754 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
755 	 * to send the resulting message to IPsec capable ills.
756 	 */
757 
758 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
759 
760 	/*
761 	 * Allocate DL_CONTROL_REQ M_PROTO
762 	 * We allocate room for the SA even if it's not needed
763 	 * by some of the operations (for example flush)
764 	 */
765 	mp = allocb(sizeof (dl_control_req_t) +
766 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
767 	if (mp == NULL)
768 		return (NULL);
769 	mp->b_datap->db_type = M_PROTO;
770 
771 	/* initialize dl_control_req_t */
772 	ctrl = (dl_control_req_t *)mp->b_wptr;
773 	ctrl->dl_primitive = DL_CONTROL_REQ;
774 	ctrl->dl_operation = dl_operation;
775 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
776 	    DL_CT_IPSEC_ESP;
777 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
778 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
779 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
780 	    sizeof (dl_ct_ipsec_key_t);
781 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
782 	mp->b_wptr += sizeof (dl_control_req_t);
783 
784 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
785 		ASSERT(sa != NULL);
786 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
787 
788 		need_key = B_TRUE;
789 
790 		/*
791 		 * Initialize key and SA data. Note that for some
792 		 * operations the SA data is ignored by the provider
793 		 * (delete, etc.)
794 		 */
795 		if (!sadb_req_from_sa(sa, mp, is_inbound))
796 			return (NULL);
797 	}
798 
799 	/* construct control message */
800 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
801 	if (ctl_mp == NULL) {
802 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
803 		freemsg(mp);
804 		return (NULL);
805 	}
806 
807 	ctl_mp->b_datap->db_type = M_CTL;
808 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
809 	ctl_mp->b_cont = mp;
810 
811 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
812 	ctl->ipsec_ctl_type = IPSEC_CTL;
813 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
814 	ctl->ipsec_ctl_sa_type = sa_type;
815 
816 	if (need_key) {
817 		/*
818 		 * Keep an additional reference on SA, since it will be
819 		 * needed by IP to send control messages corresponding
820 		 * to that SA from its perimeter. IP will do a
821 		 * IPSA_REFRELE when done with the request.
822 		 */
823 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
824 		IPSA_REFHOLD(sa);
825 		ctl->ipsec_ctl_sa = sa;
826 	} else
827 		ctl->ipsec_ctl_sa = NULL;
828 
829 	return (ctl_mp);
830 }
831 
832 
833 /*
834  * Called by sadb_ill_download() to dump the entries for a specific
835  * fanout table.  For each SA entry in the table passed as argument,
836  * use mp as a template and constructs a full DL_CONTROL message, and
837  * call ill_dlpi_send(), provided by IP, to send the resulting
838  * messages to the ill.
839  */
840 static void
841 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
842     boolean_t is_inbound)
843 {
844 	ipsa_t *walker;
845 	mblk_t *nmp, *salist;
846 	int i, error = 0;
847 	ip_stack_t	*ipst = ill->ill_ipst;
848 	netstack_t	*ns = ipst->ips_netstack;
849 
850 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
851 	    (void *)fanout, num_entries));
852 	/*
853 	 * For each IPSA hash bucket do:
854 	 *	- Hold the mutex
855 	 *	- Walk each entry, sending a corresponding request to IP
856 	 *	  for it.
857 	 */
858 	ASSERT(mp->b_datap->db_type == M_PROTO);
859 
860 	for (i = 0; i < num_entries; i++) {
861 		mutex_enter(&fanout[i].isaf_lock);
862 		salist = NULL;
863 
864 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
865 		    walker = walker->ipsa_next) {
866 			IPSECHW_DEBUG(IPSECHW_SADB,
867 			    ("sadb_ill_df: sending SA to ill via IP \n"));
868 			/*
869 			 * Duplicate the template mp passed and
870 			 * complete DL_CONTROL_REQ data.
871 			 * To be more memory efficient, we could use
872 			 * dupb() for the M_CTL and copyb() for the M_PROTO
873 			 * as the M_CTL, since the M_CTL is the same for
874 			 * every SA entry passed down to IP for the same ill.
875 			 *
876 			 * Note that copymsg/copyb ensure that the new mblk
877 			 * is at least as large as the source mblk even if it's
878 			 * not using all its storage -- therefore, nmp
879 			 * has trailing space for sadb_req_from_sa to add
880 			 * the SA-specific bits.
881 			 */
882 			mutex_enter(&walker->ipsa_lock);
883 			if (ipsec_capab_match(ill,
884 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
885 			    walker, ns)) {
886 				nmp = copymsg(mp);
887 				if (nmp == NULL) {
888 					IPSECHW_DEBUG(IPSECHW_SADB,
889 					    ("sadb_ill_df: alloc error\n"));
890 					error = ENOMEM;
891 					mutex_exit(&walker->ipsa_lock);
892 					break;
893 				}
894 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
895 					nmp->b_next = salist;
896 					salist = nmp;
897 				}
898 			}
899 			mutex_exit(&walker->ipsa_lock);
900 		}
901 		mutex_exit(&fanout[i].isaf_lock);
902 		while (salist != NULL) {
903 			nmp = salist;
904 			salist = nmp->b_next;
905 			nmp->b_next = NULL;
906 			ill_dlpi_send(ill, nmp);
907 		}
908 		if (error != 0)
909 			break;	/* out of for loop. */
910 	}
911 }
912 
913 /*
914  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
915  * the type specified by sa_type to the specified ill.
916  *
917  * We call for each fanout table defined by the SADB (one per
918  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
919  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
920  * message to the ill.
921  */
922 void
923 sadb_ill_download(ill_t *ill, uint_t sa_type)
924 {
925 	mblk_t *protomp;	/* prototype message */
926 	dl_control_req_t *ctrl;
927 	sadbp_t *spp;
928 	sadb_t *sp;
929 	int dlt;
930 	ip_stack_t	*ipst = ill->ill_ipst;
931 	netstack_t	*ns = ipst->ips_netstack;
932 
933 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
934 
935 	/*
936 	 * Allocate and initialize prototype answer. A duplicate for
937 	 * each SA is sent down to the interface.
938 	 */
939 
940 	/* DL_CONTROL_REQ M_PROTO mblk_t */
941 	protomp = allocb(sizeof (dl_control_req_t) +
942 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
943 	if (protomp == NULL)
944 		return;
945 	protomp->b_datap->db_type = M_PROTO;
946 
947 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
948 	if (sa_type == SADB_SATYPE_ESP) {
949 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
950 
951 		spp = &espstack->esp_sadb;
952 	} else {
953 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
954 
955 		spp = &ahstack->ah_sadb;
956 	}
957 
958 	ctrl = (dl_control_req_t *)protomp->b_wptr;
959 	ctrl->dl_primitive = DL_CONTROL_REQ;
960 	ctrl->dl_operation = DL_CO_SET;
961 	ctrl->dl_type = dlt;
962 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
963 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
964 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
965 	    sizeof (dl_ct_ipsec_key_t);
966 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
967 	protomp->b_wptr += sizeof (dl_control_req_t);
968 
969 	/*
970 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
971 	 * and dl_ct_ipsec_t
972 	 */
973 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
974 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
975 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
976 	freemsg(protomp);
977 }
978 
979 /*
980  * Call me to free up a security association fanout.  Use the forever
981  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
982  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
983  * when a module is unloaded).
984  */
985 static void
986 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
987 {
988 	int i;
989 	isaf_t *table = *tablep;
990 
991 	if (table == NULL)
992 		return;
993 
994 	for (i = 0; i < numentries; i++) {
995 		mutex_enter(&table[i].isaf_lock);
996 		while (table[i].isaf_ipsa != NULL)
997 			sadb_unlinkassoc(table[i].isaf_ipsa);
998 		table[i].isaf_gen++;
999 		mutex_exit(&table[i].isaf_lock);
1000 		if (forever)
1001 			mutex_destroy(&(table[i].isaf_lock));
1002 	}
1003 
1004 	if (forever) {
1005 		*tablep = NULL;
1006 		kmem_free(table, numentries * sizeof (*table));
1007 	}
1008 }
1009 
1010 /*
1011  * Entry points to sadb_destroyer().
1012  */
1013 static void
1014 sadb_flush(sadb_t *sp, netstack_t *ns)
1015 {
1016 	/*
1017 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1018 	 * enforcement, there would be a subtlety where I could add on the
1019 	 * heels of a flush.  With keysock's enforcement, however, this
1020 	 * makes ESP's job easy.
1021 	 */
1022 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1023 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
1024 
1025 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1026 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1027 }
1028 
1029 static void
1030 sadb_destroy(sadb_t *sp, netstack_t *ns)
1031 {
1032 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
1033 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1034 
1035 	/* For each acquire, destroy it, including the bucket mutex. */
1036 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1037 
1038 	ASSERT(sp->sdb_of == NULL);
1039 	ASSERT(sp->sdb_if == NULL);
1040 	ASSERT(sp->sdb_acq == NULL);
1041 }
1042 
1043 static void
1044 sadb_send_flush_req(sadbp_t *spp)
1045 {
1046 	mblk_t *ctl_mp;
1047 
1048 	/*
1049 	 * we've been unplumbed, or never were plumbed; don't go there.
1050 	 */
1051 	if (spp->s_ip_q == NULL)
1052 		return;
1053 
1054 	/* have IP send a flush msg to the IPsec accelerators */
1055 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1056 	if (ctl_mp != NULL)
1057 		putnext(spp->s_ip_q, ctl_mp);
1058 }
1059 
1060 void
1061 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1062 {
1063 	sadb_flush(&spp->s_v4, ns);
1064 	sadb_flush(&spp->s_v6, ns);
1065 
1066 	sadb_send_flush_req(spp);
1067 }
1068 
1069 void
1070 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1071 {
1072 	sadb_destroy(&spp->s_v4, ns);
1073 	sadb_destroy(&spp->s_v6, ns);
1074 
1075 	sadb_send_flush_req(spp);
1076 	if (spp->s_satype == SADB_SATYPE_AH) {
1077 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1078 
1079 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1080 	}
1081 }
1082 
1083 
1084 /*
1085  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1086  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1087  * EINVAL.
1088  */
1089 int
1090 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft)
1091 {
1092 	if (hard == NULL || soft == NULL)
1093 		return (0);
1094 
1095 	if (hard->sadb_lifetime_allocations != 0 &&
1096 	    soft->sadb_lifetime_allocations != 0 &&
1097 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1098 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1099 
1100 	if (hard->sadb_lifetime_bytes != 0 &&
1101 	    soft->sadb_lifetime_bytes != 0 &&
1102 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1103 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1104 
1105 	if (hard->sadb_lifetime_addtime != 0 &&
1106 	    soft->sadb_lifetime_addtime != 0 &&
1107 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1108 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1109 
1110 	if (hard->sadb_lifetime_usetime != 0 &&
1111 	    soft->sadb_lifetime_usetime != 0 &&
1112 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1113 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1114 
1115 	return (0);
1116 }
1117 
1118 /*
1119  * Clone a security association for the purposes of inserting a single SA
1120  * into inbound and outbound tables respectively.
1121  */
1122 static ipsa_t *
1123 sadb_cloneassoc(ipsa_t *ipsa)
1124 {
1125 	ipsa_t *newbie;
1126 	boolean_t error = B_FALSE;
1127 
1128 	ASSERT(!MUTEX_HELD(&(ipsa->ipsa_lock)));
1129 
1130 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1131 	if (newbie == NULL)
1132 		return (NULL);
1133 
1134 	/* Copy over what we can. */
1135 	*newbie = *ipsa;
1136 
1137 	/* bzero and initialize locks, in case *_init() allocates... */
1138 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1139 
1140 	/*
1141 	 * While somewhat dain-bramaged, the most graceful way to
1142 	 * recover from errors is to keep plowing through the
1143 	 * allocations, and getting what I can.  It's easier to call
1144 	 * sadb_freeassoc() on the stillborn clone when all the
1145 	 * pointers aren't pointing to the parent's data.
1146 	 */
1147 
1148 	if (ipsa->ipsa_authkey != NULL) {
1149 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1150 		    KM_NOSLEEP);
1151 		if (newbie->ipsa_authkey == NULL) {
1152 			error = B_TRUE;
1153 		} else {
1154 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1155 			    newbie->ipsa_authkeylen);
1156 
1157 			newbie->ipsa_kcfauthkey.ck_data =
1158 			    newbie->ipsa_authkey;
1159 		}
1160 
1161 		if (newbie->ipsa_amech.cm_param != NULL) {
1162 			newbie->ipsa_amech.cm_param =
1163 			    (char *)&newbie->ipsa_mac_len;
1164 		}
1165 	}
1166 
1167 	if (ipsa->ipsa_encrkey != NULL) {
1168 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1169 		    KM_NOSLEEP);
1170 		if (newbie->ipsa_encrkey == NULL) {
1171 			error = B_TRUE;
1172 		} else {
1173 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1174 			    newbie->ipsa_encrkeylen);
1175 
1176 			newbie->ipsa_kcfencrkey.ck_data =
1177 			    newbie->ipsa_encrkey;
1178 		}
1179 	}
1180 
1181 	newbie->ipsa_authtmpl = NULL;
1182 	newbie->ipsa_encrtmpl = NULL;
1183 
1184 	if (ipsa->ipsa_integ != NULL) {
1185 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1186 		    KM_NOSLEEP);
1187 		if (newbie->ipsa_integ == NULL) {
1188 			error = B_TRUE;
1189 		} else {
1190 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1191 			    newbie->ipsa_integlen);
1192 		}
1193 	}
1194 
1195 	if (ipsa->ipsa_sens != NULL) {
1196 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1197 		    KM_NOSLEEP);
1198 		if (newbie->ipsa_sens == NULL) {
1199 			error = B_TRUE;
1200 		} else {
1201 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1202 			    newbie->ipsa_senslen);
1203 		}
1204 	}
1205 
1206 	if (ipsa->ipsa_src_cid != NULL) {
1207 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1208 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1209 	}
1210 
1211 	if (ipsa->ipsa_dst_cid != NULL) {
1212 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1213 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1214 	}
1215 
1216 	if (error) {
1217 		sadb_freeassoc(newbie);
1218 		return (NULL);
1219 	}
1220 
1221 	return (newbie);
1222 }
1223 
1224 /*
1225  * Initialize a SADB address extension at the address specified by addrext.
1226  * Return a pointer to the end of the new address extension.
1227  */
1228 static uint8_t *
1229 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1230     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1231 {
1232 	struct sockaddr_in *sin;
1233 	struct sockaddr_in6 *sin6;
1234 	uint8_t *cur = start;
1235 	int addrext_len;
1236 	int sin_len;
1237 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1238 
1239 	if (cur == NULL)
1240 		return (NULL);
1241 
1242 	cur += sizeof (*addrext);
1243 	if (cur > end)
1244 		return (NULL);
1245 
1246 	addrext->sadb_address_proto = proto;
1247 	addrext->sadb_address_prefixlen = prefix;
1248 	addrext->sadb_address_reserved = 0;
1249 	addrext->sadb_address_exttype = exttype;
1250 
1251 	switch (af) {
1252 	case AF_INET:
1253 		sin = (struct sockaddr_in *)cur;
1254 		sin_len = sizeof (*sin);
1255 		cur += sin_len;
1256 		if (cur > end)
1257 			return (NULL);
1258 
1259 		sin->sin_family = af;
1260 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1261 		sin->sin_port = port;
1262 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1263 		break;
1264 	case AF_INET6:
1265 		sin6 = (struct sockaddr_in6 *)cur;
1266 		sin_len = sizeof (*sin6);
1267 		cur += sin_len;
1268 		if (cur > end)
1269 			return (NULL);
1270 
1271 		bzero(sin6, sizeof (*sin6));
1272 		sin6->sin6_family = af;
1273 		sin6->sin6_port = port;
1274 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1275 		break;
1276 	}
1277 
1278 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1279 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1280 
1281 	cur = start + addrext_len;
1282 	if (cur > end)
1283 		cur = NULL;
1284 
1285 	return (cur);
1286 }
1287 
1288 /*
1289  * Construct a key management cookie extension.
1290  */
1291 
1292 static uint8_t *
1293 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1294 {
1295 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1296 
1297 	if (cur == NULL)
1298 		return (NULL);
1299 
1300 	cur += sizeof (*kmcext);
1301 
1302 	if (cur > end)
1303 		return (NULL);
1304 
1305 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1306 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1307 	kmcext->sadb_x_kmc_proto = kmp;
1308 	kmcext->sadb_x_kmc_cookie = kmc;
1309 	kmcext->sadb_x_kmc_reserved = 0;
1310 
1311 	return (cur);
1312 }
1313 
1314 /*
1315  * Given an original message header with sufficient space following it, and an
1316  * SA, construct a full PF_KEY message with all of the relevant extensions.
1317  * This is mostly used for SADB_GET, and SADB_DUMP.
1318  */
1319 static mblk_t *
1320 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1321 {
1322 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1323 	int srcidsize, dstidsize;
1324 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1325 				/* src/dst and proxy sockaddrs. */
1326 	/*
1327 	 * The following are pointers into the PF_KEY message this PF_KEY
1328 	 * message creates.
1329 	 */
1330 	sadb_msg_t *newsamsg;
1331 	sadb_sa_t *assoc;
1332 	sadb_lifetime_t *lt;
1333 	sadb_key_t *key;
1334 	sadb_ident_t *ident;
1335 	sadb_sens_t *sens;
1336 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1337 	mblk_t *mp;
1338 	uint64_t *bitmap;
1339 	uint8_t *cur, *end;
1340 	/* These indicate the presence of the above extension fields. */
1341 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1342 
1343 	/* First off, figure out the allocation length for this message. */
1344 
1345 	/*
1346 	 * Constant stuff.  This includes base, SA, address (src, dst),
1347 	 * and lifetime (current).
1348 	 */
1349 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1350 	    sizeof (sadb_lifetime_t);
1351 
1352 	fam = ipsa->ipsa_addrfam;
1353 	switch (fam) {
1354 	case AF_INET:
1355 		addrsize = roundup(sizeof (struct sockaddr_in) +
1356 		    sizeof (sadb_address_t), sizeof (uint64_t));
1357 		break;
1358 	case AF_INET6:
1359 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1360 		    sizeof (sadb_address_t), sizeof (uint64_t));
1361 		break;
1362 	default:
1363 		return (NULL);
1364 	}
1365 	/*
1366 	 * Allocate TWO address extensions, for source and destination.
1367 	 * (Thus, the * 2.)
1368 	 */
1369 	alloclen += addrsize * 2;
1370 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1371 		alloclen += addrsize;
1372 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1373 		alloclen += addrsize;
1374 
1375 
1376 	/* How 'bout other lifetimes? */
1377 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1378 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1379 		alloclen += sizeof (sadb_lifetime_t);
1380 		soft = B_TRUE;
1381 	} else {
1382 		soft = B_FALSE;
1383 	}
1384 
1385 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1386 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1387 		alloclen += sizeof (sadb_lifetime_t);
1388 		hard = B_TRUE;
1389 	} else {
1390 		hard = B_FALSE;
1391 	}
1392 
1393 	/* Inner addresses. */
1394 	if (ipsa->ipsa_innerfam == 0) {
1395 		isrc = B_FALSE;
1396 		idst = B_FALSE;
1397 	} else {
1398 		pfam = ipsa->ipsa_innerfam;
1399 		switch (pfam) {
1400 		case AF_INET6:
1401 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1402 			    sizeof (sadb_address_t), sizeof (uint64_t));
1403 			break;
1404 		case AF_INET:
1405 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1406 			    sizeof (sadb_address_t), sizeof (uint64_t));
1407 			break;
1408 		default:
1409 			cmn_err(CE_PANIC,
1410 			    "IPsec SADB: Proxy length failure.\n");
1411 			break;
1412 		}
1413 		isrc = B_TRUE;
1414 		idst = B_TRUE;
1415 		alloclen += 2 * paddrsize;
1416 	}
1417 
1418 	/* For the following fields, assume that length != 0 ==> stuff */
1419 	if (ipsa->ipsa_authkeylen != 0) {
1420 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1421 		    sizeof (uint64_t));
1422 		alloclen += authsize;
1423 		auth = B_TRUE;
1424 	} else {
1425 		auth = B_FALSE;
1426 	}
1427 
1428 	if (ipsa->ipsa_encrkeylen != 0) {
1429 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1430 		    sizeof (uint64_t));
1431 		alloclen += encrsize;
1432 		encr = B_TRUE;
1433 	} else {
1434 		encr = B_FALSE;
1435 	}
1436 
1437 	/* No need for roundup on sens and integ. */
1438 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1439 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1440 		    ipsa->ipsa_senslen;
1441 		sensinteg = B_TRUE;
1442 	} else {
1443 		sensinteg = B_FALSE;
1444 	}
1445 
1446 	/*
1447 	 * Must use strlen() here for lengths.	Identities use NULL
1448 	 * pointers to indicate their nonexistence.
1449 	 */
1450 	if (ipsa->ipsa_src_cid != NULL) {
1451 		srcidsize = roundup(sizeof (sadb_ident_t) +
1452 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1453 		    sizeof (uint64_t));
1454 		alloclen += srcidsize;
1455 		srcid = B_TRUE;
1456 	} else {
1457 		srcid = B_FALSE;
1458 	}
1459 
1460 	if (ipsa->ipsa_dst_cid != NULL) {
1461 		dstidsize = roundup(sizeof (sadb_ident_t) +
1462 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1463 		    sizeof (uint64_t));
1464 		alloclen += dstidsize;
1465 		dstid = B_TRUE;
1466 	} else {
1467 		dstid = B_FALSE;
1468 	}
1469 
1470 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1471 		alloclen += sizeof (sadb_x_kmc_t);
1472 
1473 	/* Make sure the allocation length is a multiple of 8 bytes. */
1474 	ASSERT((alloclen & 0x7) == 0);
1475 
1476 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1477 	mp = allocb(alloclen, BPRI_HI);
1478 	if (mp == NULL)
1479 		return (NULL);
1480 
1481 	mp->b_wptr += alloclen;
1482 	end = mp->b_wptr;
1483 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1484 	*newsamsg = *samsg;
1485 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1486 
1487 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1488 
1489 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1490 
1491 	assoc = (sadb_sa_t *)(newsamsg + 1);
1492 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1493 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1494 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1495 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1496 	assoc->sadb_sa_state = ipsa->ipsa_state;
1497 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1498 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1499 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1500 
1501 	lt = (sadb_lifetime_t *)(assoc + 1);
1502 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1503 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1504 	/* We do not support the concept. */
1505 	lt->sadb_lifetime_allocations = 0;
1506 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1507 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1508 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1509 
1510 	if (hard) {
1511 		lt++;
1512 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1513 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1514 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1515 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1516 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1517 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1518 	}
1519 
1520 	if (soft) {
1521 		lt++;
1522 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1523 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1524 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1525 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1526 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1527 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1528 	}
1529 
1530 	cur = (uint8_t *)(lt + 1);
1531 
1532 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1533 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1534 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1535 	    SA_PROTO(ipsa), 0);
1536 	if (cur == NULL) {
1537 		freemsg(mp);
1538 		mp = NULL;
1539 		goto bail;
1540 	}
1541 
1542 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1543 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1544 	    SA_PROTO(ipsa), 0);
1545 	if (cur == NULL) {
1546 		freemsg(mp);
1547 		mp = NULL;
1548 		goto bail;
1549 	}
1550 
1551 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1552 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1553 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1554 		    IPPROTO_UDP, 0);
1555 		if (cur == NULL) {
1556 			freemsg(mp);
1557 			mp = NULL;
1558 			goto bail;
1559 		}
1560 	}
1561 
1562 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1563 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1564 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1565 		    IPPROTO_UDP, 0);
1566 		if (cur == NULL) {
1567 			freemsg(mp);
1568 			mp = NULL;
1569 			goto bail;
1570 		}
1571 	}
1572 
1573 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1574 	if (isrc) {
1575 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1576 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1577 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1578 		if (cur == NULL) {
1579 			freemsg(mp);
1580 			mp = NULL;
1581 			goto bail;
1582 		}
1583 	}
1584 
1585 	if (idst) {
1586 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1587 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1588 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1589 		if (cur == NULL) {
1590 			freemsg(mp);
1591 			mp = NULL;
1592 			goto bail;
1593 		}
1594 	}
1595 
1596 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1597 		cur = sadb_make_kmc_ext(cur, end,
1598 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1599 		if (cur == NULL) {
1600 			freemsg(mp);
1601 			mp = NULL;
1602 			goto bail;
1603 		}
1604 	}
1605 
1606 	walker = (sadb_ext_t *)cur;
1607 	if (auth) {
1608 		key = (sadb_key_t *)walker;
1609 		key->sadb_key_len = SADB_8TO64(authsize);
1610 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1611 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1612 		key->sadb_key_reserved = 0;
1613 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1614 		walker = (sadb_ext_t *)((uint64_t *)walker +
1615 		    walker->sadb_ext_len);
1616 	}
1617 
1618 	if (encr) {
1619 		key = (sadb_key_t *)walker;
1620 		key->sadb_key_len = SADB_8TO64(encrsize);
1621 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1622 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1623 		key->sadb_key_reserved = 0;
1624 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1625 		walker = (sadb_ext_t *)((uint64_t *)walker +
1626 		    walker->sadb_ext_len);
1627 	}
1628 
1629 	if (srcid) {
1630 		ident = (sadb_ident_t *)walker;
1631 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1632 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1633 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1634 		ident->sadb_ident_id = 0;
1635 		ident->sadb_ident_reserved = 0;
1636 		(void) strcpy((char *)(ident + 1),
1637 		    ipsa->ipsa_src_cid->ipsid_cid);
1638 		walker = (sadb_ext_t *)((uint64_t *)walker +
1639 		    walker->sadb_ext_len);
1640 	}
1641 
1642 	if (dstid) {
1643 		ident = (sadb_ident_t *)walker;
1644 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1645 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1646 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1647 		ident->sadb_ident_id = 0;
1648 		ident->sadb_ident_reserved = 0;
1649 		(void) strcpy((char *)(ident + 1),
1650 		    ipsa->ipsa_dst_cid->ipsid_cid);
1651 		walker = (sadb_ext_t *)((uint64_t *)walker +
1652 		    walker->sadb_ext_len);
1653 	}
1654 
1655 	if (sensinteg) {
1656 		sens = (sadb_sens_t *)walker;
1657 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1658 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1659 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1660 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1661 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1662 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1663 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1664 		sens->sadb_sens_reserved = 0;
1665 		bitmap = (uint64_t *)(sens + 1);
1666 		if (ipsa->ipsa_sens != NULL) {
1667 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1668 			bitmap += sens->sadb_sens_sens_len;
1669 		}
1670 		if (ipsa->ipsa_integ != NULL)
1671 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1672 		walker = (sadb_ext_t *)((uint64_t *)walker +
1673 		    walker->sadb_ext_len);
1674 	}
1675 
1676 bail:
1677 	/* Pardon any delays... */
1678 	mutex_exit(&ipsa->ipsa_lock);
1679 
1680 	return (mp);
1681 }
1682 
1683 /*
1684  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1685  * and adjust base message accordingly.
1686  *
1687  * Assume message is pulled up in one piece of contiguous memory.
1688  *
1689  * Say if we start off with:
1690  *
1691  * +------+----+-------------+-----------+---------------+---------------+
1692  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1693  * +------+----+-------------+-----------+---------------+---------------+
1694  *
1695  * we will end up with
1696  *
1697  * +------+----+-------------+-----------+---------------+
1698  * | base | SA | source addr | dest addr | soft lifetime |
1699  * +------+----+-------------+-----------+---------------+
1700  */
1701 static void
1702 sadb_strip(sadb_msg_t *samsg)
1703 {
1704 	sadb_ext_t *ext;
1705 	uint8_t *target = NULL;
1706 	uint8_t *msgend;
1707 	int sofar = SADB_8TO64(sizeof (*samsg));
1708 	int copylen;
1709 
1710 	ext = (sadb_ext_t *)(samsg + 1);
1711 	msgend = (uint8_t *)samsg;
1712 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1713 	while ((uint8_t *)ext < msgend) {
1714 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1715 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1716 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1717 			/*
1718 			 * Aha!	 I found a header to be erased.
1719 			 */
1720 
1721 			if (target != NULL) {
1722 				/*
1723 				 * If I had a previous header to be erased,
1724 				 * copy over it.  I can get away with just
1725 				 * copying backwards because the target will
1726 				 * always be 8 bytes behind the source.
1727 				 */
1728 				copylen = ((uint8_t *)ext) - (target +
1729 				    SADB_64TO8(
1730 				    ((sadb_ext_t *)target)->sadb_ext_len));
1731 				ovbcopy(((uint8_t *)ext - copylen), target,
1732 				    copylen);
1733 				target += copylen;
1734 				((sadb_ext_t *)target)->sadb_ext_len =
1735 				    SADB_8TO64(((uint8_t *)ext) - target +
1736 				    SADB_64TO8(ext->sadb_ext_len));
1737 			} else {
1738 				target = (uint8_t *)ext;
1739 			}
1740 		} else {
1741 			sofar += ext->sadb_ext_len;
1742 		}
1743 
1744 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1745 	}
1746 
1747 	ASSERT((uint8_t *)ext == msgend);
1748 
1749 	if (target != NULL) {
1750 		copylen = ((uint8_t *)ext) - (target +
1751 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1752 		if (copylen != 0)
1753 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1754 	}
1755 
1756 	/* Adjust samsg. */
1757 	samsg->sadb_msg_len = (uint16_t)sofar;
1758 
1759 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1760 }
1761 
1762 /*
1763  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1764  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1765  * the sending keysock instance is included.
1766  */
1767 void
1768 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1769     uint_t serial)
1770 {
1771 	mblk_t *msg = mp->b_cont;
1772 	sadb_msg_t *samsg;
1773 	keysock_out_t *kso;
1774 
1775 	/*
1776 	 * Enough functions call this to merit a NULL queue check.
1777 	 */
1778 	if (pfkey_q == NULL) {
1779 		freemsg(mp);
1780 		return;
1781 	}
1782 
1783 	ASSERT(msg != NULL);
1784 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1785 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1786 	samsg = (sadb_msg_t *)msg->b_rptr;
1787 	kso = (keysock_out_t *)mp->b_rptr;
1788 
1789 	kso->ks_out_type = KEYSOCK_OUT;
1790 	kso->ks_out_len = sizeof (*kso);
1791 	kso->ks_out_serial = serial;
1792 
1793 	/*
1794 	 * Only send the base message up in the event of an error.
1795 	 * Don't worry about bzero()-ing, because it was probably bogus
1796 	 * anyway.
1797 	 */
1798 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1799 	samsg = (sadb_msg_t *)msg->b_rptr;
1800 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1801 	samsg->sadb_msg_errno = (uint8_t)error;
1802 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1803 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1804 
1805 	putnext(pfkey_q, mp);
1806 }
1807 
1808 /*
1809  * Send a successful return packet back to keysock via the queue in pfkey_q.
1810  *
1811  * Often, an SA is associated with the reply message, it's passed in if needed,
1812  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1813  * and the caller will release said refcnt.
1814  */
1815 void
1816 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1817     keysock_in_t *ksi, ipsa_t *ipsa)
1818 {
1819 	keysock_out_t *kso;
1820 	mblk_t *mp1;
1821 	sadb_msg_t *newsamsg;
1822 	uint8_t *oldend;
1823 
1824 	ASSERT((mp->b_cont != NULL) &&
1825 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1826 	    ((void *)mp->b_rptr == (void *)ksi));
1827 
1828 	switch (samsg->sadb_msg_type) {
1829 	case SADB_ADD:
1830 	case SADB_UPDATE:
1831 	case SADB_FLUSH:
1832 	case SADB_DUMP:
1833 		/*
1834 		 * I have all of the message already.  I just need to strip
1835 		 * out the keying material and echo the message back.
1836 		 *
1837 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1838 		 * work.  When DUMP reaches here, it should only be a base
1839 		 * message.
1840 		 */
1841 	justecho:
1842 		ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1843 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1844 
1845 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1846 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1847 			sadb_strip(samsg);
1848 			/* Assume PF_KEY message is contiguous. */
1849 			ASSERT(mp->b_cont->b_cont == NULL);
1850 			oldend = mp->b_cont->b_wptr;
1851 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1852 			    SADB_64TO8(samsg->sadb_msg_len);
1853 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1854 		}
1855 		break;
1856 	case SADB_GET:
1857 		/*
1858 		 * Do a lot of work here, because of the ipsa I just found.
1859 		 * First construct the new PF_KEY message, then abandon
1860 		 * the old one.
1861 		 */
1862 		mp1 = sadb_sa2msg(ipsa, samsg);
1863 		if (mp1 == NULL) {
1864 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1865 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1866 			return;
1867 		}
1868 		freemsg(mp->b_cont);
1869 		mp->b_cont = mp1;
1870 		break;
1871 	case SADB_DELETE:
1872 		if (ipsa == NULL)
1873 			goto justecho;
1874 		/*
1875 		 * Because listening KMds may require more info, treat
1876 		 * DELETE like a special case of GET.
1877 		 */
1878 		mp1 = sadb_sa2msg(ipsa, samsg);
1879 		if (mp1 == NULL) {
1880 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1881 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1882 			return;
1883 		}
1884 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1885 		sadb_strip(newsamsg);
1886 		oldend = mp1->b_wptr;
1887 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1888 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1889 		freemsg(mp->b_cont);
1890 		mp->b_cont = mp1;
1891 		break;
1892 	default:
1893 		if (mp != NULL)
1894 			freemsg(mp);
1895 		return;
1896 	}
1897 
1898 	/* ksi is now null and void. */
1899 	kso = (keysock_out_t *)ksi;
1900 	kso->ks_out_type = KEYSOCK_OUT;
1901 	kso->ks_out_len = sizeof (*kso);
1902 	kso->ks_out_serial = ksi->ks_in_serial;
1903 	/* We're ready to send... */
1904 	putnext(pfkey_q, mp);
1905 }
1906 
1907 /*
1908  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1909  */
1910 void
1911 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1912     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1913 {
1914 	keysock_hello_ack_t *kha;
1915 	queue_t *oldq;
1916 
1917 	ASSERT(OTHERQ(q) != NULL);
1918 
1919 	/*
1920 	 * First, check atomically that I'm the first and only keysock
1921 	 * instance.
1922 	 *
1923 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1924 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1925 	 * messages.
1926 	 */
1927 
1928 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1929 	if (oldq != NULL) {
1930 		ASSERT(oldq != q);
1931 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1932 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1933 		freemsg(mp);
1934 		return;
1935 	}
1936 
1937 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1938 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1939 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1940 	kha->ks_hello_satype = (uint8_t)satype;
1941 
1942 	/*
1943 	 * If we made it past the casptr, then we have "exclusive" access
1944 	 * to the timeout handle.  Fire it off in 4 seconds, because it
1945 	 * just seems like a good interval.
1946 	 */
1947 	*top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000));
1948 
1949 	putnext(*pfkey_qp, mp);
1950 }
1951 
1952 /*
1953  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1954  *
1955  * Check addresses themselves for wildcard or multicast.
1956  * Check ire table for local/non-local/broadcast.
1957  */
1958 int
1959 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1960     netstack_t *ns)
1961 {
1962 	sadb_address_t *addr = (sadb_address_t *)ext;
1963 	struct sockaddr_in *sin;
1964 	struct sockaddr_in6 *sin6;
1965 	ire_t *ire;
1966 	int diagnostic, type;
1967 	boolean_t normalized = B_FALSE;
1968 
1969 	ASSERT(ext != NULL);
1970 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1971 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1972 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1973 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1974 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1975 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1976 
1977 	/* Assign both sockaddrs, the compiler will do the right thing. */
1978 	sin = (struct sockaddr_in *)(addr + 1);
1979 	sin6 = (struct sockaddr_in6 *)(addr + 1);
1980 
1981 	if (sin6->sin6_family == AF_INET6) {
1982 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1983 			/*
1984 			 * Convert to an AF_INET sockaddr.  This means the
1985 			 * return messages will have the extra space, but have
1986 			 * AF_INET sockaddrs instead of AF_INET6.
1987 			 *
1988 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1989 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1990 			 * equal to AF_INET <v4>, it shouldnt be a huge
1991 			 * problem.
1992 			 */
1993 			sin->sin_family = AF_INET;
1994 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1995 			    &sin->sin_addr);
1996 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1997 			normalized = B_TRUE;
1998 		}
1999 	} else if (sin->sin_family != AF_INET) {
2000 		switch (ext->sadb_ext_type) {
2001 		case SADB_EXT_ADDRESS_SRC:
2002 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2003 			break;
2004 		case SADB_EXT_ADDRESS_DST:
2005 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2006 			break;
2007 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2008 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2009 			break;
2010 		case SADB_X_EXT_ADDRESS_INNER_DST:
2011 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2012 			break;
2013 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2014 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2015 			break;
2016 		case SADB_X_EXT_ADDRESS_NATT_REM:
2017 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2018 			break;
2019 			/* There is no default, see above ASSERT. */
2020 		}
2021 bail:
2022 		if (pfkey_q != NULL) {
2023 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2024 			    serial);
2025 		} else {
2026 			/*
2027 			 * Scribble in sadb_msg that we got passed in.
2028 			 * Overload "mp" to be an sadb_msg pointer.
2029 			 */
2030 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2031 
2032 			samsg->sadb_msg_errno = EINVAL;
2033 			samsg->sadb_x_msg_diagnostic = diagnostic;
2034 		}
2035 		return (KS_IN_ADDR_UNKNOWN);
2036 	}
2037 
2038 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2039 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2040 		/*
2041 		 * We need only check for prefix issues.
2042 		 */
2043 
2044 		/* Set diagnostic now, in case we need it later. */
2045 		diagnostic =
2046 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2047 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2048 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2049 
2050 		if (normalized)
2051 			addr->sadb_address_prefixlen -= 96;
2052 
2053 		/*
2054 		 * Verify and mask out inner-addresses based on prefix length.
2055 		 */
2056 		if (sin->sin_family == AF_INET) {
2057 			if (addr->sadb_address_prefixlen > 32)
2058 				goto bail;
2059 			sin->sin_addr.s_addr &=
2060 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2061 		} else {
2062 			in6_addr_t mask;
2063 
2064 			ASSERT(sin->sin_family == AF_INET6);
2065 			/*
2066 			 * ip_plen_to_mask_v6() returns NULL if the value in
2067 			 * question is out of range.
2068 			 */
2069 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2070 			    &mask) == NULL)
2071 				goto bail;
2072 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2073 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2074 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2075 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2076 		}
2077 
2078 		/* We don't care in these cases. */
2079 		return (KS_IN_ADDR_DONTCARE);
2080 	}
2081 
2082 	if (sin->sin_family == AF_INET6) {
2083 		/* Check the easy ones now. */
2084 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2085 			return (KS_IN_ADDR_MBCAST);
2086 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2087 			return (KS_IN_ADDR_UNSPEC);
2088 		/*
2089 		 * At this point, we're a unicast IPv6 address.
2090 		 *
2091 		 * A ctable lookup for local is sufficient here.  If we're
2092 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2093 		 *
2094 		 * XXX Zones alert -> me/notme decision needs to be tempered
2095 		 * by what zone we're in when we go to zone-aware IPsec.
2096 		 */
2097 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2098 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2099 		    ns->netstack_ip);
2100 		if (ire != NULL) {
2101 			/* Hey hey, it's local. */
2102 			IRE_REFRELE(ire);
2103 			return (KS_IN_ADDR_ME);
2104 		}
2105 	} else {
2106 		ASSERT(sin->sin_family == AF_INET);
2107 		if (sin->sin_addr.s_addr == INADDR_ANY)
2108 			return (KS_IN_ADDR_UNSPEC);
2109 		if (CLASSD(sin->sin_addr.s_addr))
2110 			return (KS_IN_ADDR_MBCAST);
2111 		/*
2112 		 * At this point we're a unicast or broadcast IPv4 address.
2113 		 *
2114 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2115 		 * A NULL return value is NOTME, otherwise, look at the
2116 		 * returned ire for broadcast or not and return accordingly.
2117 		 *
2118 		 * XXX Zones alert -> me/notme decision needs to be tempered
2119 		 * by what zone we're in when we go to zone-aware IPsec.
2120 		 */
2121 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2122 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2123 		    MATCH_IRE_TYPE, ns->netstack_ip);
2124 		if (ire != NULL) {
2125 			/* Check for local or broadcast */
2126 			type = ire->ire_type;
2127 			IRE_REFRELE(ire);
2128 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2129 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2130 			    KS_IN_ADDR_MBCAST);
2131 		}
2132 	}
2133 
2134 	return (KS_IN_ADDR_NOTME);
2135 }
2136 
2137 /*
2138  * Address normalizations and reality checks for inbound PF_KEY messages.
2139  *
2140  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2141  * the source to AF_INET.  Do the same for the inner sources.
2142  */
2143 boolean_t
2144 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2145 {
2146 	struct sockaddr_in *src, *isrc;
2147 	struct sockaddr_in6 *dst, *idst;
2148 	sadb_address_t *srcext, *dstext;
2149 	uint16_t sport;
2150 	sadb_ext_t **extv = ksi->ks_in_extv;
2151 	int rc;
2152 
2153 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2154 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2155 		    ksi->ks_in_serial, ns);
2156 		if (rc == KS_IN_ADDR_UNKNOWN)
2157 			return (B_FALSE);
2158 		if (rc == KS_IN_ADDR_MBCAST) {
2159 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2160 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2161 			return (B_FALSE);
2162 		}
2163 		ksi->ks_in_srctype = rc;
2164 	}
2165 
2166 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2167 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2168 		    ksi->ks_in_serial, ns);
2169 		if (rc == KS_IN_ADDR_UNKNOWN)
2170 			return (B_FALSE);
2171 		if (rc == KS_IN_ADDR_UNSPEC) {
2172 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2173 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2174 			return (B_FALSE);
2175 		}
2176 		ksi->ks_in_dsttype = rc;
2177 	}
2178 
2179 	/*
2180 	 * NAT-Traversal addrs are simple enough to not require all of
2181 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2182 	 * AF_INET.
2183 	 */
2184 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2185 		rc = sadb_addrcheck(pfkey_q, mp,
2186 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2187 
2188 		/*
2189 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2190 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2191 		 * AND local-port flexibility).
2192 		 */
2193 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2194 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2195 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2196 			    ksi->ks_in_serial);
2197 			return (B_FALSE);
2198 		}
2199 		src = (struct sockaddr_in *)
2200 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2201 		if (src->sin_family != AF_INET) {
2202 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2203 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2204 			    ksi->ks_in_serial);
2205 			return (B_FALSE);
2206 		}
2207 	}
2208 
2209 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2210 		rc = sadb_addrcheck(pfkey_q, mp,
2211 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2212 
2213 		/*
2214 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2215 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2216 		 */
2217 		if (rc != KS_IN_ADDR_NOTME &&
2218 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2219 		    rc == KS_IN_ADDR_UNSPEC)) {
2220 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2221 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2222 			    ksi->ks_in_serial);
2223 			return (B_FALSE);
2224 		}
2225 		src = (struct sockaddr_in *)
2226 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2227 		if (src->sin_family != AF_INET) {
2228 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2229 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2230 			    ksi->ks_in_serial);
2231 			return (B_FALSE);
2232 		}
2233 	}
2234 
2235 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2236 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2237 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2238 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2239 			    ksi->ks_in_serial);
2240 			return (B_FALSE);
2241 		}
2242 
2243 		if (sadb_addrcheck(pfkey_q, mp,
2244 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2245 		    == KS_IN_ADDR_UNKNOWN ||
2246 		    sadb_addrcheck(pfkey_q, mp,
2247 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2248 		    == KS_IN_ADDR_UNKNOWN)
2249 			return (B_FALSE);
2250 
2251 		isrc = (struct sockaddr_in *)
2252 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2253 		    1);
2254 		idst = (struct sockaddr_in6 *)
2255 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2256 		    1);
2257 		if (isrc->sin_family != idst->sin6_family) {
2258 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2259 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2260 			    ksi->ks_in_serial);
2261 			return (B_FALSE);
2262 		}
2263 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2264 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2265 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2266 			    ksi->ks_in_serial);
2267 			return (B_FALSE);
2268 	} else {
2269 		isrc = NULL;	/* For inner/outer port check below. */
2270 	}
2271 
2272 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2273 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2274 
2275 	if (dstext == NULL || srcext == NULL)
2276 		return (B_TRUE);
2277 
2278 	dst = (struct sockaddr_in6 *)(dstext + 1);
2279 	src = (struct sockaddr_in *)(srcext + 1);
2280 
2281 	if (isrc != NULL &&
2282 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2283 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2284 		/* Can't set inner and outer ports in one SA. */
2285 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2286 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2287 		    ksi->ks_in_serial);
2288 		return (B_FALSE);
2289 	}
2290 
2291 	if (dst->sin6_family == src->sin_family)
2292 		return (B_TRUE);
2293 
2294 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2295 		if (srcext->sadb_address_proto == 0) {
2296 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2297 		} else if (dstext->sadb_address_proto == 0) {
2298 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2299 		} else {
2300 			/* Inequal protocols, neither were 0.  Report error. */
2301 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2302 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2303 			    ksi->ks_in_serial);
2304 			return (B_FALSE);
2305 		}
2306 	}
2307 
2308 	/*
2309 	 * With the exception of an unspec IPv6 source and an IPv4
2310 	 * destination, address families MUST me matched.
2311 	 */
2312 	if (src->sin_family == AF_INET ||
2313 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2314 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2315 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2316 		return (B_FALSE);
2317 	}
2318 
2319 	/*
2320 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2321 	 * in the same place for sockaddr_in and sockaddr_in6.
2322 	 */
2323 	sport = src->sin_port;
2324 	bzero(src, sizeof (*src));
2325 	src->sin_family = AF_INET;
2326 	src->sin_port = sport;
2327 
2328 	return (B_TRUE);
2329 }
2330 
2331 /*
2332  * Set the results in "addrtype", given an IRE as requested by
2333  * sadb_addrcheck().
2334  */
2335 int
2336 sadb_addrset(ire_t *ire)
2337 {
2338 	if ((ire->ire_type & IRE_BROADCAST) ||
2339 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2340 	    (ire->ire_ipversion == IPV6_VERSION &&
2341 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2342 		return (KS_IN_ADDR_MBCAST);
2343 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2344 		return (KS_IN_ADDR_ME);
2345 	return (KS_IN_ADDR_NOTME);
2346 }
2347 
2348 
2349 /*
2350  * Walker callback function to delete sa's based on src/dst address.
2351  * Assumes that we're called with *head locked, no other locks held;
2352  * Conveniently, and not coincidentally, this is both what sadb_walker
2353  * gives us and also what sadb_unlinkassoc expects.
2354  */
2355 
2356 struct sadb_purge_state
2357 {
2358 	uint32_t *src;
2359 	uint32_t *dst;
2360 	sa_family_t af;
2361 	boolean_t inbnd;
2362 	char *sidstr;
2363 	char *didstr;
2364 	uint16_t sidtype;
2365 	uint16_t didtype;
2366 	uint32_t kmproto;
2367 	mblk_t *mq;
2368 };
2369 
2370 static void
2371 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2372 {
2373 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2374 
2375 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2376 
2377 	mutex_enter(&entry->ipsa_lock);
2378 
2379 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2380 	    (ps->src != NULL &&
2381 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2382 	    (ps->dst != NULL &&
2383 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2384 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2385 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2386 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2387 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2388 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2389 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2390 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2391 		mutex_exit(&entry->ipsa_lock);
2392 		return;
2393 	}
2394 
2395 	entry->ipsa_state = IPSA_STATE_DEAD;
2396 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2397 }
2398 
2399 /*
2400  * Common code to purge an SA with a matching src or dst address.
2401  * Don't kill larval SA's in such a purge.
2402  */
2403 int
2404 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2405     queue_t *ip_q)
2406 {
2407 	sadb_address_t *dstext =
2408 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2409 	sadb_address_t *srcext =
2410 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2411 	sadb_ident_t *dstid =
2412 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2413 	sadb_ident_t *srcid =
2414 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2415 	sadb_x_kmc_t *kmc =
2416 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2417 	struct sockaddr_in *src, *dst;
2418 	struct sockaddr_in6 *src6, *dst6;
2419 	struct sadb_purge_state ps;
2420 
2421 	/*
2422 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2423 	 * takes care of them.
2424 	 */
2425 
2426 	/* enforced by caller */
2427 	ASSERT((dstext != NULL) || (srcext != NULL));
2428 
2429 	ps.src = NULL;
2430 	ps.dst = NULL;
2431 #ifdef DEBUG
2432 	ps.af = (sa_family_t)-1;
2433 #endif
2434 	ps.mq = NULL;
2435 	ps.sidstr = NULL;
2436 	ps.didstr = NULL;
2437 	ps.kmproto = SADB_X_KMP_MAX + 1;
2438 
2439 	if (dstext != NULL) {
2440 		dst = (struct sockaddr_in *)(dstext + 1);
2441 		ps.af = dst->sin_family;
2442 		if (dst->sin_family == AF_INET6) {
2443 			dst6 = (struct sockaddr_in6 *)dst;
2444 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2445 		} else {
2446 			ps.dst = (uint32_t *)&dst->sin_addr;
2447 		}
2448 	}
2449 
2450 	if (srcext != NULL) {
2451 		src = (struct sockaddr_in *)(srcext + 1);
2452 		ps.af = src->sin_family;
2453 		if (src->sin_family == AF_INET6) {
2454 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2455 			ps.src = (uint32_t *)&src6->sin6_addr;
2456 		} else {
2457 			ps.src = (uint32_t *)&src->sin_addr;
2458 		}
2459 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2460 	}
2461 
2462 	ASSERT(ps.af != (sa_family_t)-1);
2463 
2464 	if (dstid != NULL) {
2465 		/*
2466 		 * NOTE:  May need to copy string in the future
2467 		 * if the inbound keysock message disappears for some strange
2468 		 * reason.
2469 		 */
2470 		ps.didstr = (char *)(dstid + 1);
2471 		ps.didtype = dstid->sadb_ident_type;
2472 	}
2473 
2474 	if (srcid != NULL) {
2475 		/*
2476 		 * NOTE:  May need to copy string in the future
2477 		 * if the inbound keysock message disappears for some strange
2478 		 * reason.
2479 		 */
2480 		ps.sidstr = (char *)(srcid + 1);
2481 		ps.sidtype = srcid->sadb_ident_type;
2482 	}
2483 
2484 	if (kmc != NULL)
2485 		ps.kmproto = kmc->sadb_x_kmc_proto;
2486 
2487 	/*
2488 	 * This is simple, crude, and effective.
2489 	 * Unimplemented optimizations (TBD):
2490 	 * - we can limit how many places we search based on where we
2491 	 * think the SA is filed.
2492 	 * - if we get a dst address, we can hash based on dst addr to find
2493 	 * the correct bucket in the outbound table.
2494 	 */
2495 	ps.inbnd = B_TRUE;
2496 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2497 	ps.inbnd = B_FALSE;
2498 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2499 
2500 	if (ps.mq != NULL)
2501 		sadb_drain_torchq(ip_q, ps.mq);
2502 
2503 	ASSERT(mp->b_cont != NULL);
2504 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2505 	    NULL);
2506 	return (0);
2507 }
2508 
2509 /*
2510  * Common code to delete/get an SA.
2511  */
2512 int
2513 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2514     int *diagnostic, queue_t *pfkey_q, boolean_t delete)
2515 {
2516 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2517 	sadb_address_t *srcext =
2518 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2519 	sadb_address_t *dstext =
2520 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2521 	struct sockaddr_in *src, *dst;
2522 	struct sockaddr_in6 *src6, *dst6;
2523 	sadb_t *sp;
2524 	ipsa_t *outbound_target, *inbound_target;
2525 	isaf_t *inbound, *outbound;
2526 	uint32_t *srcaddr, *dstaddr;
2527 	mblk_t *torchq = NULL;
2528 	sa_family_t af;
2529 
2530 	if (dstext == NULL) {
2531 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2532 		return (EINVAL);
2533 	}
2534 	if (assoc == NULL) {
2535 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2536 		return (EINVAL);
2537 	}
2538 
2539 	/*
2540 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2541 	 * takes care of them.
2542 	 */
2543 
2544 	dst = (struct sockaddr_in *)(dstext + 1);
2545 	af = dst->sin_family;
2546 	if (af == AF_INET6) {
2547 		sp = &spp->s_v6;
2548 		dst6 = (struct sockaddr_in6 *)dst;
2549 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2550 		if (srcext != NULL) {
2551 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2552 			srcaddr = (uint32_t *)&src6->sin6_addr;
2553 			ASSERT(src6->sin6_family == AF_INET6);
2554 		} else {
2555 			srcaddr = ALL_ZEROES_PTR;
2556 		}
2557 
2558 		outbound = OUTBOUND_BUCKET_V6(sp, *(uint32_t *)dstaddr);
2559 	} else {
2560 		sp = &spp->s_v4;
2561 		dstaddr = (uint32_t *)&dst->sin_addr;
2562 		if (srcext != NULL) {
2563 			src = (struct sockaddr_in *)(srcext + 1);
2564 			srcaddr = (uint32_t *)&src->sin_addr;
2565 			ASSERT(src->sin_family == AF_INET);
2566 		} else {
2567 			srcaddr = ALL_ZEROES_PTR;
2568 		}
2569 		outbound = OUTBOUND_BUCKET_V4(sp, *(uint32_t *)dstaddr);
2570 	}
2571 
2572 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2573 
2574 	/* Lock down both buckets. */
2575 	mutex_enter(&outbound->isaf_lock);
2576 	mutex_enter(&inbound->isaf_lock);
2577 
2578 	/* Try outbound first. */
2579 	outbound_target = ipsec_getassocbyspi(outbound, assoc->sadb_sa_spi,
2580 	    srcaddr, dstaddr, af);
2581 
2582 	if (outbound_target == NULL || outbound_target->ipsa_haspeer) {
2583 		inbound_target = ipsec_getassocbyspi(inbound,
2584 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2585 	} else {
2586 		inbound_target = NULL;
2587 	}
2588 
2589 	if (outbound_target == NULL && inbound_target == NULL) {
2590 		mutex_exit(&inbound->isaf_lock);
2591 		mutex_exit(&outbound->isaf_lock);
2592 		return (ESRCH);
2593 	}
2594 
2595 	if (delete) {
2596 		/* At this point, I have one or two SAs to be deleted. */
2597 		if (outbound_target != NULL) {
2598 			mutex_enter(&outbound_target->ipsa_lock);
2599 			outbound_target->ipsa_state = IPSA_STATE_DEAD;
2600 			(void) sadb_torch_assoc(outbound, outbound_target,
2601 			    B_FALSE, &torchq);
2602 		}
2603 
2604 		if (inbound_target != NULL) {
2605 			mutex_enter(&inbound_target->ipsa_lock);
2606 			inbound_target->ipsa_state = IPSA_STATE_DEAD;
2607 			(void) sadb_torch_assoc(inbound, inbound_target,
2608 			    B_TRUE, &torchq);
2609 		}
2610 	}
2611 
2612 	mutex_exit(&inbound->isaf_lock);
2613 	mutex_exit(&outbound->isaf_lock);
2614 
2615 	if (torchq != NULL)
2616 		sadb_drain_torchq(spp->s_ip_q, torchq);
2617 
2618 	/*
2619 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
2620 	 * them in { }.
2621 	 */
2622 	ASSERT(mp->b_cont != NULL);
2623 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2624 	    (outbound_target != NULL ? outbound_target : inbound_target));
2625 
2626 	if (outbound_target != NULL) {
2627 		IPSA_REFRELE(outbound_target);
2628 	}
2629 	if (inbound_target != NULL) {
2630 		IPSA_REFRELE(inbound_target);
2631 	}
2632 
2633 	return (0);
2634 }
2635 
2636 /*
2637  * Initialize the mechanism parameters associated with an SA.
2638  * These parameters can be shared by multiple packets, which saves
2639  * us from the overhead of consulting the algorithm table for
2640  * each packet.
2641  */
2642 static void
2643 sadb_init_alginfo(ipsa_t *sa)
2644 {
2645 	ipsec_alginfo_t *alg;
2646 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
2647 
2648 	mutex_enter(&ipss->ipsec_alg_lock);
2649 
2650 	if (sa->ipsa_encrkey != NULL) {
2651 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
2652 		if (alg != NULL && ALG_VALID(alg)) {
2653 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
2654 			sa->ipsa_emech.cm_param = NULL;
2655 			sa->ipsa_emech.cm_param_len = 0;
2656 			sa->ipsa_iv_len = alg->alg_datalen;
2657 		} else
2658 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
2659 	}
2660 
2661 	if (sa->ipsa_authkey != NULL) {
2662 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
2663 		if (alg != NULL && ALG_VALID(alg)) {
2664 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
2665 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
2666 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
2667 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
2668 		} else
2669 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
2670 	}
2671 
2672 	mutex_exit(&ipss->ipsec_alg_lock);
2673 }
2674 
2675 /*
2676  * Perform NAT-traversal cached checksum offset calculations here.
2677  */
2678 static void
2679 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2680     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2681     uint32_t *dst_addr_ptr)
2682 {
2683 	struct sockaddr_in *natt_loc, *natt_rem;
2684 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2685 	uint32_t running_sum = 0;
2686 
2687 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2688 
2689 	if (natt_rem_ext != NULL) {
2690 		uint32_t l_src;
2691 		uint32_t l_rem;
2692 
2693 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2694 
2695 		/* Ensured by sadb_addrfix(). */
2696 		ASSERT(natt_rem->sin_family == AF_INET);
2697 
2698 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2699 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2700 		l_src = *src_addr_ptr;
2701 		l_rem = *natt_rem_ptr;
2702 
2703 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2704 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2705 
2706 		l_src = ntohl(l_src);
2707 		DOWN_SUM(l_src);
2708 		DOWN_SUM(l_src);
2709 		l_rem = ntohl(l_rem);
2710 		DOWN_SUM(l_rem);
2711 		DOWN_SUM(l_rem);
2712 
2713 		/*
2714 		 * We're 1's complement for checksums, so check for wraparound
2715 		 * here.
2716 		 */
2717 		if (l_rem > l_src)
2718 			l_src--;
2719 
2720 		running_sum += l_src - l_rem;
2721 
2722 		DOWN_SUM(running_sum);
2723 		DOWN_SUM(running_sum);
2724 	}
2725 
2726 	if (natt_loc_ext != NULL) {
2727 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2728 
2729 		/* Ensured by sadb_addrfix(). */
2730 		ASSERT(natt_loc->sin_family == AF_INET);
2731 
2732 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2733 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2734 
2735 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2736 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2737 
2738 		/*
2739 		 * NAT-T port agility means we may have natt_loc_ext, but
2740 		 * only for a local-port change.
2741 		 */
2742 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2743 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2744 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2745 
2746 			DOWN_SUM(l_loc);
2747 			DOWN_SUM(l_loc);
2748 			DOWN_SUM(l_dst);
2749 			DOWN_SUM(l_dst);
2750 
2751 			/*
2752 			 * We're 1's complement for checksums, so check for
2753 			 * wraparound here.
2754 			 */
2755 			if (l_loc > l_dst)
2756 				l_dst--;
2757 
2758 			running_sum += l_dst - l_loc;
2759 			DOWN_SUM(running_sum);
2760 			DOWN_SUM(running_sum);
2761 		}
2762 	}
2763 
2764 	newbie->ipsa_inbound_cksum = running_sum;
2765 #undef DOWN_SUM
2766 }
2767 
2768 /*
2769  * This function is called from consumers that need to insert a fully-grown
2770  * security association into its tables.  This function takes into account that
2771  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2772  * hash bucket parameters are set in order of what the SA will be most of the
2773  * time.  (For example, an SA with an unspecified source, and a multicast
2774  * destination will primarily be an outbound SA.  OTOH, if that destination
2775  * is unicast for this node, then the SA will primarily be inbound.)
2776  *
2777  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2778  * to check both buckets for purposes of collision.
2779  *
2780  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2781  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2782  * with additional diagnostic information because there is at least one EINVAL
2783  * case here.
2784  */
2785 int
2786 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2787     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2788     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2789     netstack_t *ns)
2790 {
2791 	ipsa_t *newbie_clone = NULL, *scratch;
2792 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2793 	sadb_address_t *srcext =
2794 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2795 	sadb_address_t *dstext =
2796 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2797 	sadb_address_t *isrcext =
2798 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2799 	sadb_address_t *idstext =
2800 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2801 	sadb_x_kmc_t *kmcext =
2802 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2803 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2804 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2805 #if 0
2806 	/*
2807 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
2808 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
2809 	 */
2810 	sadb_sens_t *sens = (sadb_sens_t *);
2811 #endif
2812 	struct sockaddr_in *src, *dst, *isrc, *idst;
2813 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2814 	sadb_lifetime_t *soft =
2815 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2816 	sadb_lifetime_t *hard =
2817 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2818 	sa_family_t af;
2819 	int error = 0;
2820 	boolean_t isupdate = (newbie != NULL);
2821 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2822 	mblk_t *ctl_mp = NULL;
2823 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2824 
2825 	src = (struct sockaddr_in *)(srcext + 1);
2826 	src6 = (struct sockaddr_in6 *)(srcext + 1);
2827 	dst = (struct sockaddr_in *)(dstext + 1);
2828 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2829 	if (isrcext != NULL) {
2830 		isrc = (struct sockaddr_in *)(isrcext + 1);
2831 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2832 		ASSERT(idstext != NULL);
2833 		idst = (struct sockaddr_in *)(idstext + 1);
2834 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2835 	} else {
2836 		isrc = NULL;
2837 		isrc6 = NULL;
2838 	}
2839 
2840 	af = src->sin_family;
2841 
2842 	if (af == AF_INET) {
2843 		src_addr_ptr = (uint32_t *)&src->sin_addr;
2844 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2845 	} else {
2846 		ASSERT(af == AF_INET6);
2847 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2848 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2849 	}
2850 
2851 	if (!isupdate) {
2852 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
2853 		    src_addr_ptr, dst_addr_ptr, af, ns);
2854 		if (newbie == NULL)
2855 			return (ENOMEM);
2856 	}
2857 
2858 	mutex_enter(&newbie->ipsa_lock);
2859 
2860 	if (isrc != NULL) {
2861 		if (isrc->sin_family == AF_INET) {
2862 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
2863 				if (srcext->sadb_address_proto != 0) {
2864 					/*
2865 					 * Mismatched outer-packet protocol
2866 					 * and inner-packet address family.
2867 					 */
2868 					mutex_exit(&newbie->ipsa_lock);
2869 					error = EPROTOTYPE;
2870 					goto error;
2871 				} else {
2872 					/* Fill in with explicit protocol. */
2873 					srcext->sadb_address_proto =
2874 					    IPPROTO_ENCAP;
2875 					dstext->sadb_address_proto =
2876 					    IPPROTO_ENCAP;
2877 				}
2878 			}
2879 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
2880 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
2881 		} else {
2882 			ASSERT(isrc->sin_family == AF_INET6);
2883 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
2884 				if (srcext->sadb_address_proto != 0) {
2885 					/*
2886 					 * Mismatched outer-packet protocol
2887 					 * and inner-packet address family.
2888 					 */
2889 					mutex_exit(&newbie->ipsa_lock);
2890 					error = EPROTOTYPE;
2891 					goto error;
2892 				} else {
2893 					/* Fill in with explicit protocol. */
2894 					srcext->sadb_address_proto =
2895 					    IPPROTO_IPV6;
2896 					dstext->sadb_address_proto =
2897 					    IPPROTO_IPV6;
2898 				}
2899 			}
2900 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
2901 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
2902 		}
2903 		newbie->ipsa_innerfam = isrc->sin_family;
2904 
2905 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
2906 		    newbie->ipsa_innerfam);
2907 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
2908 		    newbie->ipsa_innerfam);
2909 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
2910 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
2911 
2912 		/* Unique value uses inner-ports for Tunnel Mode... */
2913 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
2914 		    idst->sin_port, dstext->sadb_address_proto,
2915 		    idstext->sadb_address_proto);
2916 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
2917 		    idst->sin_port, dstext->sadb_address_proto,
2918 		    idstext->sadb_address_proto);
2919 	} else {
2920 		/* ... and outer-ports for Transport Mode. */
2921 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
2922 		    dst->sin_port, dstext->sadb_address_proto, 0);
2923 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
2924 		    dst->sin_port, dstext->sadb_address_proto, 0);
2925 	}
2926 	if (newbie->ipsa_unique_mask != (uint64_t)0)
2927 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
2928 
2929 
2930 	sadb_nat_calculations(newbie,
2931 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
2932 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
2933 	    src_addr_ptr, dst_addr_ptr);
2934 
2935 	newbie->ipsa_type = samsg->sadb_msg_satype;
2936 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
2937 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
2938 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
2939 	/*
2940 	 * Use |= because we set unique fields above.  UNIQUE is filtered
2941 	 * out before we reach here so it's not like we're sabotaging anything.
2942 	 * ASSERT we're either 0 or UNIQUE for good measure, though.
2943 	 */
2944 	ASSERT((newbie->ipsa_flags & IPSA_F_UNIQUE) == newbie->ipsa_flags);
2945 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
2946 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
2947 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
2948 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
2949 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
2950 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
2951 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
2952 		mutex_exit(&newbie->ipsa_lock);
2953 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
2954 		error = EINVAL;
2955 		goto error;
2956 	}
2957 	/*
2958 	 * If unspecified source address, force replay_wsize to 0.
2959 	 * This is because an SA that has multiple sources of secure
2960 	 * traffic cannot enforce a replay counter w/o synchronizing the
2961 	 * senders.
2962 	 */
2963 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
2964 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
2965 	else
2966 		newbie->ipsa_replay_wsize = 0;
2967 
2968 	newbie->ipsa_addtime = gethrestime_sec();
2969 
2970 	if (kmcext != NULL) {
2971 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
2972 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
2973 	}
2974 
2975 	/*
2976 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
2977 	 * The spec says that one can update current lifetimes, but
2978 	 * that seems impractical, especially in the larval-to-mature
2979 	 * update that this function performs.
2980 	 */
2981 	if (soft != NULL) {
2982 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
2983 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
2984 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
2985 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
2986 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
2987 	}
2988 	if (hard != NULL) {
2989 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
2990 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
2991 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
2992 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
2993 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
2994 	}
2995 
2996 	newbie->ipsa_authtmpl = NULL;
2997 	newbie->ipsa_encrtmpl = NULL;
2998 
2999 	if (akey != NULL) {
3000 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3001 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3002 		/* In case we have to round up to the next byte... */
3003 		if ((akey->sadb_key_bits & 0x7) != 0)
3004 			newbie->ipsa_authkeylen++;
3005 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3006 		    KM_NOSLEEP);
3007 		if (newbie->ipsa_authkey == NULL) {
3008 			error = ENOMEM;
3009 			mutex_exit(&newbie->ipsa_lock);
3010 			goto error;
3011 		}
3012 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3013 		bzero(akey + 1, newbie->ipsa_authkeylen);
3014 
3015 		/*
3016 		 * Pre-initialize the kernel crypto framework key
3017 		 * structure.
3018 		 */
3019 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3020 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3021 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3022 
3023 		mutex_enter(&ipss->ipsec_alg_lock);
3024 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3025 		mutex_exit(&ipss->ipsec_alg_lock);
3026 		if (error != 0) {
3027 			mutex_exit(&newbie->ipsa_lock);
3028 			goto error;
3029 		}
3030 	}
3031 
3032 	if (ekey != NULL) {
3033 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3034 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3035 		/* In case we have to round up to the next byte... */
3036 		if ((ekey->sadb_key_bits & 0x7) != 0)
3037 			newbie->ipsa_encrkeylen++;
3038 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3039 		    KM_NOSLEEP);
3040 		if (newbie->ipsa_encrkey == NULL) {
3041 			error = ENOMEM;
3042 			mutex_exit(&newbie->ipsa_lock);
3043 			goto error;
3044 		}
3045 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3046 		/* XXX is this safe w.r.t db_ref, etc? */
3047 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3048 
3049 		/*
3050 		 * Pre-initialize the kernel crypto framework key
3051 		 * structure.
3052 		 */
3053 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3054 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3055 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3056 
3057 		mutex_enter(&ipss->ipsec_alg_lock);
3058 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3059 		mutex_exit(&ipss->ipsec_alg_lock);
3060 		if (error != 0) {
3061 			mutex_exit(&newbie->ipsa_lock);
3062 			goto error;
3063 		}
3064 	}
3065 
3066 	sadb_init_alginfo(newbie);
3067 
3068 	/*
3069 	 * Ptrs to processing functions.
3070 	 */
3071 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3072 		ipsecesp_init_funcs(newbie);
3073 	else
3074 		ipsecah_init_funcs(newbie);
3075 	ASSERT(newbie->ipsa_output_func != NULL &&
3076 	    newbie->ipsa_input_func != NULL);
3077 
3078 	/*
3079 	 * Certificate ID stuff.
3080 	 */
3081 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3082 		sadb_ident_t *id =
3083 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3084 
3085 		/*
3086 		 * Can assume strlen() will return okay because ext_check() in
3087 		 * keysock.c prepares the string for us.
3088 		 */
3089 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3090 		    (char *)(id+1), ns);
3091 		if (newbie->ipsa_src_cid == NULL) {
3092 			error = ENOMEM;
3093 			mutex_exit(&newbie->ipsa_lock);
3094 			goto error;
3095 		}
3096 	}
3097 
3098 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3099 		sadb_ident_t *id =
3100 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3101 
3102 		/*
3103 		 * Can assume strlen() will return okay because ext_check() in
3104 		 * keysock.c prepares the string for us.
3105 		 */
3106 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3107 		    (char *)(id+1), ns);
3108 		if (newbie->ipsa_dst_cid == NULL) {
3109 			error = ENOMEM;
3110 			mutex_exit(&newbie->ipsa_lock);
3111 			goto error;
3112 		}
3113 	}
3114 
3115 #if 0
3116 	/* XXXMLS  SENSITIVITY handling code. */
3117 	if (sens != NULL) {
3118 		int i;
3119 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3120 
3121 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3122 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3123 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3124 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3125 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3126 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3127 		    KM_NOSLEEP);
3128 		if (newbie->ipsa_integ == NULL) {
3129 			error = ENOMEM;
3130 			mutex_exit(&newbie->ipsa_lock);
3131 			goto error;
3132 		}
3133 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3134 		    KM_NOSLEEP);
3135 		if (newbie->ipsa_sens == NULL) {
3136 			error = ENOMEM;
3137 			mutex_exit(&newbie->ipsa_lock);
3138 			goto error;
3139 		}
3140 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3141 			newbie->ipsa_sens[i] = *bitmap;
3142 			bitmap++;
3143 		}
3144 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3145 			newbie->ipsa_integ[i] = *bitmap;
3146 			bitmap++;
3147 		}
3148 	}
3149 
3150 #endif
3151 
3152 	/* now that the SA has been updated, set its new state */
3153 	newbie->ipsa_state = assoc->sadb_sa_state;
3154 
3155 	/*
3156 	 * The less locks I hold when doing an insertion and possible cloning,
3157 	 * the better!
3158 	 */
3159 	mutex_exit(&newbie->ipsa_lock);
3160 
3161 	if (clone) {
3162 		newbie_clone = sadb_cloneassoc(newbie);
3163 
3164 		if (newbie_clone == NULL) {
3165 			error = ENOMEM;
3166 			goto error;
3167 		}
3168 		newbie->ipsa_haspeer = B_TRUE;
3169 		newbie_clone->ipsa_haspeer = B_TRUE;
3170 	}
3171 
3172 	/*
3173 	 * Enter the bucket locks.  The order of entry is outbound,
3174 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3175 	 * based on the destination address type.  If the destination address
3176 	 * type is for a node that isn't mine (or potentially mine), the
3177 	 * "primary" bucket is the outbound one.
3178 	 */
3179 	if (ksi->ks_in_dsttype == KS_IN_ADDR_NOTME) {
3180 		/* primary == outbound */
3181 		mutex_enter(&primary->isaf_lock);
3182 		mutex_enter(&secondary->isaf_lock);
3183 	} else {
3184 		/* primary == inbound */
3185 		mutex_enter(&secondary->isaf_lock);
3186 		mutex_enter(&primary->isaf_lock);
3187 	}
3188 
3189 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3190 	    newbie->ipsa_spi));
3191 
3192 	/*
3193 	 * sadb_insertassoc() doesn't increment the reference
3194 	 * count.  We therefore have to increment the
3195 	 * reference count one more time to reflect the
3196 	 * pointers of the table that reference this SA.
3197 	 */
3198 	IPSA_REFHOLD(newbie);
3199 
3200 	if (isupdate) {
3201 		/*
3202 		 * Unlink from larval holding cell in the "inbound" fanout.
3203 		 */
3204 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3205 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3206 		sadb_unlinkassoc(newbie);
3207 	}
3208 
3209 	mutex_enter(&newbie->ipsa_lock);
3210 	error = sadb_insertassoc(newbie, primary);
3211 	if (error == 0) {
3212 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3213 		    is_inbound);
3214 	}
3215 	mutex_exit(&newbie->ipsa_lock);
3216 
3217 	if (error != 0) {
3218 		/*
3219 		 * Since sadb_insertassoc() failed, we must decrement the
3220 		 * refcount again so the cleanup code will actually free
3221 		 * the offending SA.
3222 		 */
3223 		IPSA_REFRELE(newbie);
3224 		goto error_unlock;
3225 	}
3226 
3227 	if (newbie_clone != NULL) {
3228 		mutex_enter(&newbie_clone->ipsa_lock);
3229 		error = sadb_insertassoc(newbie_clone, secondary);
3230 		mutex_exit(&newbie_clone->ipsa_lock);
3231 		if (error != 0) {
3232 			/* Collision in secondary table. */
3233 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3234 			goto error_unlock;
3235 		}
3236 		IPSA_REFHOLD(newbie_clone);
3237 	} else {
3238 		ASSERT(primary != secondary);
3239 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3240 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3241 		if (scratch != NULL) {
3242 			/* Collision in secondary table. */
3243 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3244 			/* Set the error, since ipsec_getassocbyspi() can't. */
3245 			error = EEXIST;
3246 			goto error_unlock;
3247 		}
3248 	}
3249 
3250 	/* OKAY!  So let's do some reality check assertions. */
3251 
3252 	ASSERT(!MUTEX_HELD(&newbie->ipsa_lock));
3253 	ASSERT(newbie_clone == NULL || (!MUTEX_HELD(&newbie_clone->ipsa_lock)));
3254 	/*
3255 	 * If hardware acceleration could happen, send it.
3256 	 */
3257 	if (ctl_mp != NULL) {
3258 		putnext(ip_q, ctl_mp);
3259 		ctl_mp = NULL;
3260 	}
3261 
3262 error_unlock:
3263 
3264 	/*
3265 	 * We can exit the locks in any order.	Only entrance needs to
3266 	 * follow any protocol.
3267 	 */
3268 	mutex_exit(&secondary->isaf_lock);
3269 	mutex_exit(&primary->isaf_lock);
3270 
3271 	/* Common error point for this routine. */
3272 error:
3273 	if (newbie != NULL) {
3274 		IPSA_REFRELE(newbie);
3275 	}
3276 	if (newbie_clone != NULL) {
3277 		IPSA_REFRELE(newbie_clone);
3278 	}
3279 	if (ctl_mp != NULL)
3280 		freemsg(ctl_mp);
3281 
3282 	if (error == 0) {
3283 		/*
3284 		 * Construct favorable PF_KEY return message and send to
3285 		 * keysock.  (Q:  Do I need to pass "newbie"?  If I do,
3286 		 * make sure to REFHOLD, call, then REFRELE.)
3287 		 */
3288 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3289 	}
3290 
3291 	return (error);
3292 }
3293 
3294 /*
3295  * Set the time of first use for a security association.  Update any
3296  * expiration times as a result.
3297  */
3298 void
3299 sadb_set_usetime(ipsa_t *assoc)
3300 {
3301 	time_t snapshot = gethrestime_sec();
3302 
3303 	mutex_enter(&assoc->ipsa_lock);
3304 	assoc->ipsa_lastuse = snapshot;
3305 	/*
3306 	 * Caller does check usetime before calling me usually, and
3307 	 * double-checking is better than a mutex_enter/exit hit.
3308 	 */
3309 	if (assoc->ipsa_usetime == 0) {
3310 		/*
3311 		 * This is redundant for outbound SA's, as
3312 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3313 		 * Inbound SAs, however, have no such protection.
3314 		 */
3315 		assoc->ipsa_flags |= IPSA_F_USED;
3316 		assoc->ipsa_usetime = snapshot;
3317 
3318 		/*
3319 		 * After setting the use time, see if we have a use lifetime
3320 		 * that would cause the actual SA expiration time to shorten.
3321 		 */
3322 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3323 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3324 	}
3325 	mutex_exit(&assoc->ipsa_lock);
3326 }
3327 
3328 /*
3329  * Send up a PF_KEY expire message for this association.
3330  */
3331 static void
3332 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3333 {
3334 	mblk_t *mp, *mp1;
3335 	int alloclen, af;
3336 	sadb_msg_t *samsg;
3337 	sadb_lifetime_t *current, *expire;
3338 	sadb_sa_t *saext;
3339 	uint8_t *end;
3340 	boolean_t tunnel_mode;
3341 
3342 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3343 
3344 	/* Don't bother sending if there's no queue. */
3345 	if (pfkey_q == NULL)
3346 		return;
3347 
3348 	mp = sadb_keysock_out(0);
3349 	if (mp == NULL) {
3350 		/* cmn_err(CE_WARN, */
3351 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3352 		return;
3353 	}
3354 
3355 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3356 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3357 
3358 	af = assoc->ipsa_addrfam;
3359 	switch (af) {
3360 	case AF_INET:
3361 		alloclen += 2 * sizeof (struct sockaddr_in);
3362 		break;
3363 	case AF_INET6:
3364 		alloclen += 2 * sizeof (struct sockaddr_in6);
3365 		break;
3366 	default:
3367 		/* Won't happen unless there's a kernel bug. */
3368 		freeb(mp);
3369 		cmn_err(CE_WARN,
3370 		    "sadb_expire_assoc: Unknown address length.\n");
3371 		return;
3372 	}
3373 
3374 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3375 	if (tunnel_mode) {
3376 		alloclen += 2 * sizeof (sadb_address_t);
3377 		switch (assoc->ipsa_innerfam) {
3378 		case AF_INET:
3379 			alloclen += 2 * sizeof (struct sockaddr_in);
3380 			break;
3381 		case AF_INET6:
3382 			alloclen += 2 * sizeof (struct sockaddr_in6);
3383 			break;
3384 		default:
3385 			/* Won't happen unless there's a kernel bug. */
3386 			freeb(mp);
3387 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3388 			    "Unknown inner address length.\n");
3389 			return;
3390 		}
3391 	}
3392 
3393 	mp->b_cont = allocb(alloclen, BPRI_HI);
3394 	if (mp->b_cont == NULL) {
3395 		freeb(mp);
3396 		/* cmn_err(CE_WARN, */
3397 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3398 		return;
3399 	}
3400 
3401 	mp1 = mp;
3402 	mp = mp->b_cont;
3403 	end = mp->b_wptr + alloclen;
3404 
3405 	samsg = (sadb_msg_t *)mp->b_wptr;
3406 	mp->b_wptr += sizeof (*samsg);
3407 	samsg->sadb_msg_version = PF_KEY_V2;
3408 	samsg->sadb_msg_type = SADB_EXPIRE;
3409 	samsg->sadb_msg_errno = 0;
3410 	samsg->sadb_msg_satype = assoc->ipsa_type;
3411 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3412 	samsg->sadb_msg_reserved = 0;
3413 	samsg->sadb_msg_seq = 0;
3414 	samsg->sadb_msg_pid = 0;
3415 
3416 	saext = (sadb_sa_t *)mp->b_wptr;
3417 	mp->b_wptr += sizeof (*saext);
3418 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3419 	saext->sadb_sa_exttype = SADB_EXT_SA;
3420 	saext->sadb_sa_spi = assoc->ipsa_spi;
3421 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3422 	saext->sadb_sa_state = assoc->ipsa_state;
3423 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3424 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3425 	saext->sadb_sa_flags = assoc->ipsa_flags;
3426 
3427 	current = (sadb_lifetime_t *)mp->b_wptr;
3428 	mp->b_wptr += sizeof (sadb_lifetime_t);
3429 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3430 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3431 	/* We do not support the concept. */
3432 	current->sadb_lifetime_allocations = 0;
3433 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3434 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3435 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3436 
3437 	expire = (sadb_lifetime_t *)mp->b_wptr;
3438 	mp->b_wptr += sizeof (*expire);
3439 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3440 
3441 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3442 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3443 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3444 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3445 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3446 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3447 	} else {
3448 		ASSERT(assoc->ipsa_state == IPSA_STATE_DYING);
3449 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3450 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3451 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3452 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3453 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3454 	}
3455 
3456 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3457 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3458 	    SA_PROTO(assoc), 0);
3459 	ASSERT(mp->b_wptr != NULL);
3460 
3461 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3462 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3463 	    SA_PROTO(assoc), 0);
3464 	ASSERT(mp->b_wptr != NULL);
3465 
3466 	if (tunnel_mode) {
3467 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3468 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3469 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3470 		    assoc->ipsa_innersrcpfx);
3471 		ASSERT(mp->b_wptr != NULL);
3472 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3473 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3474 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3475 		    assoc->ipsa_innerdstpfx);
3476 		ASSERT(mp->b_wptr != NULL);
3477 	}
3478 
3479 	/* Can just putnext, we're ready to go! */
3480 	putnext(pfkey_q, mp1);
3481 }
3482 
3483 /*
3484  * "Age" the SA with the number of bytes that was used to protect traffic.
3485  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3486  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3487  * otherwise.  (If B_FALSE is returned, the association either was, or became
3488  * DEAD.)
3489  */
3490 boolean_t
3491 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3492     boolean_t sendmsg)
3493 {
3494 	boolean_t rc = B_TRUE;
3495 	uint64_t newtotal;
3496 
3497 	mutex_enter(&assoc->ipsa_lock);
3498 	newtotal = assoc->ipsa_bytes + bytes;
3499 	if (assoc->ipsa_hardbyteslt != 0 &&
3500 	    newtotal >= assoc->ipsa_hardbyteslt) {
3501 		if (assoc->ipsa_state < IPSA_STATE_DEAD) {
3502 			/*
3503 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3504 			 * this off on another non-interrupt thread.  Also
3505 			 * unlink this SA immediately.
3506 			 */
3507 			assoc->ipsa_state = IPSA_STATE_DEAD;
3508 			if (sendmsg)
3509 				sadb_expire_assoc(pfkey_q, assoc);
3510 			/*
3511 			 * Set non-zero expiration time so sadb_age_assoc()
3512 			 * will work when reaping.
3513 			 */
3514 			assoc->ipsa_hardexpiretime = (time_t)1;
3515 		} /* Else someone beat me to it! */
3516 		rc = B_FALSE;
3517 	} else if (assoc->ipsa_softbyteslt != 0 &&
3518 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3519 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3520 			/*
3521 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3522 			 * this off on another non-interrupt thread.
3523 			 */
3524 			assoc->ipsa_state = IPSA_STATE_DYING;
3525 			assoc->ipsa_bytes = newtotal;
3526 			if (sendmsg)
3527 				sadb_expire_assoc(pfkey_q, assoc);
3528 		} /* Else someone beat me to it! */
3529 	}
3530 	if (rc == B_TRUE)
3531 		assoc->ipsa_bytes = newtotal;
3532 	mutex_exit(&assoc->ipsa_lock);
3533 	return (rc);
3534 }
3535 
3536 /*
3537  * Push one or more DL_CO_DELETE messages queued up by
3538  * sadb_torch_assoc down to the underlying driver now that it's a
3539  * convenient time for it (i.e., ipsa bucket locks not held).
3540  */
3541 static void
3542 sadb_drain_torchq(queue_t *q, mblk_t *mp)
3543 {
3544 	while (mp != NULL) {
3545 		mblk_t *next = mp->b_next;
3546 		mp->b_next = NULL;
3547 		if (q != NULL)
3548 			putnext(q, mp);
3549 		else
3550 			freemsg(mp);
3551 		mp = next;
3552 	}
3553 }
3554 
3555 /*
3556  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3557  *     sadb_age_assoc().
3558  *
3559  * If SA is hardware-accelerated, and we can't allocate the mblk
3560  * containing the DL_CO_DELETE, just return; it will remain in the
3561  * table and be swept up by sadb_ager() in a subsequent pass.
3562  */
3563 static ipsa_t *
3564 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
3565 {
3566 	mblk_t *mp;
3567 
3568 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3569 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3570 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3571 
3572 	/*
3573 	 * Force cached SAs to be revalidated..
3574 	 */
3575 	head->isaf_gen++;
3576 
3577 	if (sa->ipsa_flags & IPSA_F_HW) {
3578 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
3579 		if (mp == NULL) {
3580 			mutex_exit(&sa->ipsa_lock);
3581 			return (NULL);
3582 		}
3583 		mp->b_next = *mq;
3584 		*mq = mp;
3585 	}
3586 	mutex_exit(&sa->ipsa_lock);
3587 	sadb_unlinkassoc(sa);
3588 
3589 	return (NULL);
3590 }
3591 
3592 /*
3593  * Do various SA-is-idle activities depending on delta (the number of idle
3594  * seconds on the SA) and/or other properties of the SA.
3595  *
3596  * Return B_TRUE if I've sent a packet, because I have to drop the
3597  * association's mutex before sending a packet out the wire.
3598  */
3599 /* ARGSUSED */
3600 static boolean_t
3601 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3602 {
3603 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3604 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3605 
3606 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3607 
3608 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3609 	    delta >= nat_t_interval &&
3610 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3611 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3612 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3613 		mutex_exit(&assoc->ipsa_lock);
3614 		ipsecesp_send_keepalive(assoc);
3615 		return (B_TRUE);
3616 	}
3617 	return (B_FALSE);
3618 }
3619 
3620 /*
3621  * Return "assoc" iff haspeer is true and I send an expire.  This allows
3622  * the consumers' aging functions to tidy up an expired SA's peer.
3623  */
3624 static ipsa_t *
3625 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3626     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
3627 {
3628 	ipsa_t *retval = NULL;
3629 	boolean_t dropped_mutex = B_FALSE;
3630 
3631 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3632 
3633 	mutex_enter(&assoc->ipsa_lock);
3634 
3635 	if ((assoc->ipsa_state == IPSA_STATE_LARVAL) &&
3636 	    (assoc->ipsa_hardexpiretime <= current)) {
3637 		assoc->ipsa_state = IPSA_STATE_DEAD;
3638 		return (sadb_torch_assoc(head, assoc, inbound, mq));
3639 	}
3640 
3641 	/*
3642 	 * Check lifetimes.  Fortunately, SA setup is done
3643 	 * such that there are only two times to look at,
3644 	 * softexpiretime, and hardexpiretime.
3645 	 *
3646 	 * Check hard first.
3647 	 */
3648 
3649 	if (assoc->ipsa_hardexpiretime != 0 &&
3650 	    assoc->ipsa_hardexpiretime <= current) {
3651 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
3652 			return (sadb_torch_assoc(head, assoc, inbound, mq));
3653 
3654 		/*
3655 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3656 		 */
3657 		assoc->ipsa_state = IPSA_STATE_DEAD;
3658 		if (assoc->ipsa_haspeer) {
3659 			/*
3660 			 * If I return assoc, I have to bump up its
3661 			 * reference count to keep with the ipsa_t reference
3662 			 * count semantics.
3663 			 */
3664 			IPSA_REFHOLD(assoc);
3665 			retval = assoc;
3666 		}
3667 		sadb_expire_assoc(pfkey_q, assoc);
3668 		assoc->ipsa_hardexpiretime = current + reap_delay;
3669 	} else if (assoc->ipsa_softexpiretime != 0 &&
3670 	    assoc->ipsa_softexpiretime <= current &&
3671 	    assoc->ipsa_state < IPSA_STATE_DYING) {
3672 		/*
3673 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
3674 		 * this off on another non-interrupt thread.
3675 		 */
3676 		assoc->ipsa_state = IPSA_STATE_DYING;
3677 		if (assoc->ipsa_haspeer) {
3678 			/*
3679 			 * If I return assoc, I have to bump up its
3680 			 * reference count to keep with the ipsa_t reference
3681 			 * count semantics.
3682 			 */
3683 			IPSA_REFHOLD(assoc);
3684 			retval = assoc;
3685 		}
3686 		sadb_expire_assoc(pfkey_q, assoc);
3687 	} else {
3688 		/* Check idle time activities. */
3689 		dropped_mutex = sadb_idle_activities(assoc,
3690 		    current - assoc->ipsa_lastuse, inbound);
3691 	}
3692 
3693 	if (!dropped_mutex)
3694 		mutex_exit(&assoc->ipsa_lock);
3695 	return (retval);
3696 }
3697 
3698 /*
3699  * Called by a consumer protocol to do ther dirty work of reaping dead
3700  * Security Associations.
3701  */
3702 void
3703 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
3704     netstack_t *ns)
3705 {
3706 	int i;
3707 	isaf_t *bucket;
3708 	ipsa_t *assoc, *spare;
3709 	iacqf_t *acqlist;
3710 	ipsacq_t *acqrec, *spareacq;
3711 	struct templist {
3712 		ipsa_t *ipsa;
3713 		struct templist *next;
3714 	} *haspeerlist = NULL, *newbie;
3715 	/* Snapshot current time now. */
3716 	time_t current = gethrestime_sec();
3717 	int outhash;
3718 	mblk_t *mq = NULL;
3719 
3720 	/*
3721 	 * Do my dirty work.  This includes aging real entries, aging
3722 	 * larvals, and aging outstanding ACQUIREs.
3723 	 *
3724 	 * I hope I don't tie up resources for too long.
3725 	 */
3726 
3727 	/* Age acquires. */
3728 
3729 	for (i = 0; i < sp->sdb_hashsize; i++) {
3730 		acqlist = &sp->sdb_acq[i];
3731 		mutex_enter(&acqlist->iacqf_lock);
3732 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3733 		    acqrec = spareacq) {
3734 			spareacq = acqrec->ipsacq_next;
3735 			if (current > acqrec->ipsacq_expire)
3736 				sadb_destroy_acquire(acqrec, ns);
3737 		}
3738 		mutex_exit(&acqlist->iacqf_lock);
3739 	}
3740 
3741 	/* Age inbound associations. */
3742 	for (i = 0; i < sp->sdb_hashsize; i++) {
3743 		bucket = &(sp->sdb_if[i]);
3744 		mutex_enter(&bucket->isaf_lock);
3745 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
3746 		    assoc = spare) {
3747 			spare = assoc->ipsa_next;
3748 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
3749 			    reap_delay, B_TRUE, &mq) != NULL) {
3750 				/*
3751 				 * sadb_age_assoc() increments the refcnt,
3752 				 * effectively doing an IPSA_REFHOLD().
3753 				 */
3754 				newbie = kmem_alloc(sizeof (*newbie),
3755 				    KM_NOSLEEP);
3756 				if (newbie == NULL) {
3757 					/*
3758 					 * Don't forget to REFRELE().
3759 					 */
3760 					IPSA_REFRELE(assoc);
3761 					continue;	/* for loop... */
3762 				}
3763 				newbie->next = haspeerlist;
3764 				newbie->ipsa = assoc;
3765 				haspeerlist = newbie;
3766 			}
3767 		}
3768 		mutex_exit(&bucket->isaf_lock);
3769 	}
3770 
3771 	if (mq != NULL) {
3772 		sadb_drain_torchq(ip_q, mq);
3773 		mq = NULL;
3774 	}
3775 	/*
3776 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
3777 	 * is address independent.
3778 	 */
3779 	while (haspeerlist != NULL) {
3780 		/* "spare" contains the SA that has a peer. */
3781 		spare = haspeerlist->ipsa;
3782 		newbie = haspeerlist;
3783 		haspeerlist = newbie->next;
3784 		kmem_free(newbie, sizeof (*newbie));
3785 		/*
3786 		 * Pick peer bucket based on addrfam.
3787 		 */
3788 		if (spare->ipsa_addrfam == AF_INET6) {
3789 			outhash = OUTBOUND_HASH_V6(sp,
3790 			    *((in6_addr_t *)&spare->ipsa_dstaddr));
3791 		} else {
3792 			outhash = OUTBOUND_HASH_V4(sp,
3793 			    *((ipaddr_t *)&spare->ipsa_dstaddr));
3794 		}
3795 		bucket = &(sp->sdb_of[outhash]);
3796 
3797 		mutex_enter(&bucket->isaf_lock);
3798 		assoc = ipsec_getassocbyspi(bucket, spare->ipsa_spi,
3799 		    spare->ipsa_srcaddr, spare->ipsa_dstaddr,
3800 		    spare->ipsa_addrfam);
3801 		mutex_exit(&bucket->isaf_lock);
3802 		if (assoc != NULL) {
3803 			mutex_enter(&assoc->ipsa_lock);
3804 			mutex_enter(&spare->ipsa_lock);
3805 			assoc->ipsa_state = spare->ipsa_state;
3806 			if (assoc->ipsa_state == IPSA_STATE_DEAD)
3807 				assoc->ipsa_hardexpiretime = 1;
3808 			mutex_exit(&spare->ipsa_lock);
3809 			mutex_exit(&assoc->ipsa_lock);
3810 			IPSA_REFRELE(assoc);
3811 		}
3812 		IPSA_REFRELE(spare);
3813 	}
3814 
3815 	/* Age outbound associations. */
3816 	for (i = 0; i < sp->sdb_hashsize; i++) {
3817 		bucket = &(sp->sdb_of[i]);
3818 		mutex_enter(&bucket->isaf_lock);
3819 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
3820 		    assoc = spare) {
3821 			spare = assoc->ipsa_next;
3822 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
3823 			    reap_delay, B_FALSE, &mq) != NULL) {
3824 				/*
3825 				 * sadb_age_assoc() increments the refcnt,
3826 				 * effectively doing an IPSA_REFHOLD().
3827 				 */
3828 				newbie = kmem_alloc(sizeof (*newbie),
3829 				    KM_NOSLEEP);
3830 				if (newbie == NULL) {
3831 					/*
3832 					 * Don't forget to REFRELE().
3833 					 */
3834 					IPSA_REFRELE(assoc);
3835 					continue;	/* for loop... */
3836 				}
3837 				newbie->next = haspeerlist;
3838 				newbie->ipsa = assoc;
3839 				haspeerlist = newbie;
3840 			}
3841 		}
3842 		mutex_exit(&bucket->isaf_lock);
3843 	}
3844 	if (mq != NULL) {
3845 		sadb_drain_torchq(ip_q, mq);
3846 		mq = NULL;
3847 	}
3848 	/*
3849 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
3850 	 * is address independent.
3851 	 */
3852 	while (haspeerlist != NULL) {
3853 		/* "spare" contains the SA that has a peer. */
3854 		spare = haspeerlist->ipsa;
3855 		newbie = haspeerlist;
3856 		haspeerlist = newbie->next;
3857 		kmem_free(newbie, sizeof (*newbie));
3858 		/*
3859 		 * Pick peer bucket based on addrfam.
3860 		 */
3861 		bucket = INBOUND_BUCKET(sp, spare->ipsa_spi);
3862 		mutex_enter(&bucket->isaf_lock);
3863 		assoc = ipsec_getassocbyspi(bucket, spare->ipsa_spi,
3864 		    spare->ipsa_srcaddr, spare->ipsa_dstaddr,
3865 		    spare->ipsa_addrfam);
3866 		mutex_exit(&bucket->isaf_lock);
3867 		if (assoc != NULL) {
3868 			mutex_enter(&assoc->ipsa_lock);
3869 			mutex_enter(&spare->ipsa_lock);
3870 			assoc->ipsa_state = spare->ipsa_state;
3871 			if (assoc->ipsa_state == IPSA_STATE_DEAD)
3872 				assoc->ipsa_hardexpiretime = 1;
3873 			mutex_exit(&spare->ipsa_lock);
3874 			mutex_exit(&assoc->ipsa_lock);
3875 			IPSA_REFRELE(assoc);
3876 		}
3877 		IPSA_REFRELE(spare);
3878 	}
3879 	/*
3880 	 * Run a GC pass to clean out dead identities.
3881 	 */
3882 	ipsid_gc(ns);
3883 }
3884 
3885 /*
3886  * Figure out when to reschedule the ager.
3887  */
3888 timeout_id_t
3889 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
3890     void *agerarg, uint_t *intp, uint_t intmax, short mid)
3891 {
3892 	hrtime_t end = gethrtime();
3893 	uint_t interval = *intp;
3894 
3895 	/*
3896 	 * See how long this took.  If it took too long, increase the
3897 	 * aging interval.
3898 	 */
3899 	if ((end - begin) > interval * 1000000) {
3900 		if (interval >= intmax) {
3901 			/* XXX Rate limit this?  Or recommend flush? */
3902 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
3903 			    "Too many SA's to age out in %d msec.\n",
3904 			    intmax);
3905 		} else {
3906 			/* Double by shifting by one bit. */
3907 			interval <<= 1;
3908 			interval = min(interval, intmax);
3909 		}
3910 	} else if ((end - begin) <= interval * 500000 &&
3911 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
3912 		/*
3913 		 * If I took less than half of the interval, then I should
3914 		 * ratchet the interval back down.  Never automatically
3915 		 * shift below the default aging interval.
3916 		 *
3917 		 * NOTE:This even overrides manual setting of the age
3918 		 *	interval using NDD.
3919 		 */
3920 		/* Halve by shifting one bit. */
3921 		interval >>= 1;
3922 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
3923 	}
3924 	*intp = interval;
3925 	return (qtimeout(pfkey_q, ager, agerarg,
3926 	    interval * drv_usectohz(1000)));
3927 }
3928 
3929 
3930 /*
3931  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
3932  * message takes when updating a MATURE or DYING SA.
3933  */
3934 static void
3935 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
3936     sadb_lifetime_t *soft)
3937 {
3938 	mutex_enter(&assoc->ipsa_lock);
3939 
3940 	assoc->ipsa_state = IPSA_STATE_MATURE;
3941 
3942 	/*
3943 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
3944 	 * passed in during an update message.	We currently don't handle
3945 	 * these.
3946 	 */
3947 
3948 	if (hard != NULL) {
3949 		if (hard->sadb_lifetime_bytes != 0)
3950 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3951 		if (hard->sadb_lifetime_usetime != 0)
3952 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
3953 		if (hard->sadb_lifetime_addtime != 0)
3954 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3955 		if (assoc->ipsa_hardaddlt != 0) {
3956 			assoc->ipsa_hardexpiretime =
3957 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
3958 		}
3959 		if (assoc->ipsa_harduselt != 0) {
3960 			if (assoc->ipsa_hardexpiretime != 0) {
3961 				assoc->ipsa_hardexpiretime =
3962 				    min(assoc->ipsa_hardexpiretime,
3963 				    assoc->ipsa_usetime +
3964 				    assoc->ipsa_harduselt);
3965 			} else {
3966 				assoc->ipsa_hardexpiretime =
3967 				    assoc->ipsa_usetime + assoc->ipsa_harduselt;
3968 			}
3969 		}
3970 
3971 		if (hard->sadb_lifetime_allocations != 0)
3972 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3973 	}
3974 
3975 	if (soft != NULL) {
3976 		if (soft->sadb_lifetime_bytes != 0)
3977 			assoc->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3978 		if (soft->sadb_lifetime_usetime != 0)
3979 			assoc->ipsa_softuselt = soft->sadb_lifetime_usetime;
3980 		if (soft->sadb_lifetime_addtime != 0)
3981 			assoc->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3982 		if (assoc->ipsa_softaddlt != 0) {
3983 			assoc->ipsa_softexpiretime =
3984 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
3985 		}
3986 		if (assoc->ipsa_softuselt != 0) {
3987 			if (assoc->ipsa_softexpiretime != 0) {
3988 				assoc->ipsa_softexpiretime =
3989 				    min(assoc->ipsa_softexpiretime,
3990 				    assoc->ipsa_usetime +
3991 				    assoc->ipsa_softuselt);
3992 			} else {
3993 				assoc->ipsa_softexpiretime =
3994 				    assoc->ipsa_usetime + assoc->ipsa_softuselt;
3995 			}
3996 		}
3997 
3998 		if (soft->sadb_lifetime_allocations != 0)
3999 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4000 	}
4001 
4002 	mutex_exit(&assoc->ipsa_lock);
4003 }
4004 
4005 /*
4006  * Common code to update an SA.
4007  */
4008 
4009 int
4010 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
4011     sadb_t *sp, int *diagnostic, queue_t *pfkey_q,
4012     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4013     netstack_t *ns)
4014 {
4015 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4016 	sadb_address_t *srcext =
4017 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4018 	sadb_address_t *dstext =
4019 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4020 	sadb_x_kmc_t *kmcext =
4021 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4022 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4023 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4024 	struct sockaddr_in *src, *dst;
4025 	struct sockaddr_in6 *src6, *dst6;
4026 	sadb_lifetime_t *soft =
4027 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4028 	sadb_lifetime_t *hard =
4029 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4030 	isaf_t *inbound, *outbound;
4031 	ipsa_t *outbound_target = NULL, *inbound_target = NULL;
4032 	int error = 0;
4033 	uint32_t *srcaddr, *dstaddr;
4034 	sa_family_t af;
4035 	uint32_t kmp = 0, kmc = 0;
4036 
4037 	/* I need certain extensions present for either UPDATE message. */
4038 	if (srcext == NULL) {
4039 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4040 		return (EINVAL);
4041 	}
4042 	if (dstext == NULL) {
4043 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4044 		return (EINVAL);
4045 	}
4046 	if (assoc == NULL) {
4047 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4048 		return (EINVAL);
4049 	}
4050 
4051 	if (kmcext != NULL) {
4052 		kmp = kmcext->sadb_x_kmc_proto;
4053 		kmc = kmcext->sadb_x_kmc_cookie;
4054 	}
4055 
4056 	dst = (struct sockaddr_in *)(dstext + 1);
4057 	src = (struct sockaddr_in *)(srcext + 1);
4058 	af = dst->sin_family;
4059 	if (af == AF_INET6) {
4060 		dst6 = (struct sockaddr_in6 *)dst;
4061 		src6 = (struct sockaddr_in6 *)src;
4062 
4063 		srcaddr = (uint32_t *)&src6->sin6_addr;
4064 		dstaddr = (uint32_t *)&dst6->sin6_addr;
4065 		outbound = OUTBOUND_BUCKET_V6(sp, *(uint32_t *)dstaddr);
4066 	} else {
4067 		srcaddr = (uint32_t *)&src->sin_addr;
4068 		dstaddr = (uint32_t *)&dst->sin_addr;
4069 		outbound = OUTBOUND_BUCKET_V4(sp, *(uint32_t *)dstaddr);
4070 	}
4071 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
4072 
4073 	/* Lock down both buckets. */
4074 	mutex_enter(&outbound->isaf_lock);
4075 	mutex_enter(&inbound->isaf_lock);
4076 
4077 	/* Try outbound first. */
4078 	outbound_target = ipsec_getassocbyspi(outbound, assoc->sadb_sa_spi,
4079 	    srcaddr, dstaddr, af);
4080 	inbound_target = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi,
4081 	    srcaddr, dstaddr, af);
4082 
4083 	mutex_exit(&inbound->isaf_lock);
4084 	mutex_exit(&outbound->isaf_lock);
4085 
4086 	if (outbound_target == NULL) {
4087 		if (inbound_target == NULL) {
4088 			return (ESRCH);
4089 		} else if (inbound_target->ipsa_state == IPSA_STATE_LARVAL) {
4090 			/*
4091 			 * REFRELE the target and let the add_sa_func()
4092 			 * deal with updating a larval SA.
4093 			 */
4094 			IPSA_REFRELE(inbound_target);
4095 			return (add_sa_func(mp, ksi, diagnostic, ns));
4096 		}
4097 	}
4098 
4099 	/*
4100 	 * Reality checks for updates of active associations.
4101 	 * Sundry first-pass UPDATE-specific reality checks.
4102 	 * Have to do the checks here, because it's after the add_sa code.
4103 	 * XXX STATS : logging/stats here?
4104 	 */
4105 
4106 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4107 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4108 		error = EINVAL;
4109 		goto bail;
4110 	}
4111 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
4112 	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) {
4113 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4114 		error = EINVAL;
4115 		goto bail;
4116 	}
4117 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4118 		error = EOPNOTSUPP;
4119 		goto bail;
4120 	}
4121 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
4122 		error = EINVAL;
4123 		goto bail;
4124 	}
4125 	ASSERT(src->sin_family == dst->sin_family);
4126 	if (akey != NULL) {
4127 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4128 		error = EINVAL;
4129 		goto bail;
4130 	}
4131 	if (ekey != NULL) {
4132 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4133 		error = EINVAL;
4134 		goto bail;
4135 	}
4136 
4137 	if (outbound_target != NULL) {
4138 		if (outbound_target->ipsa_state == IPSA_STATE_DEAD) {
4139 			error = ESRCH;	/* DEAD == Not there, in this case. */
4140 			goto bail;
4141 		}
4142 		if ((kmp != 0) &&
4143 		    ((outbound_target->ipsa_kmp != 0) ||
4144 		    (outbound_target->ipsa_kmp != kmp))) {
4145 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4146 			error = EINVAL;
4147 			goto bail;
4148 		}
4149 		if ((kmc != 0) &&
4150 		    ((outbound_target->ipsa_kmc != 0) ||
4151 		    (outbound_target->ipsa_kmc != kmc))) {
4152 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4153 			error = EINVAL;
4154 			goto bail;
4155 		}
4156 	}
4157 
4158 	if (inbound_target != NULL) {
4159 		if (inbound_target->ipsa_state == IPSA_STATE_DEAD) {
4160 			error = ESRCH;	/* DEAD == Not there, in this case. */
4161 			goto bail;
4162 		}
4163 		if ((kmp != 0) &&
4164 		    ((inbound_target->ipsa_kmp != 0) ||
4165 		    (inbound_target->ipsa_kmp != kmp))) {
4166 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4167 			error = EINVAL;
4168 			goto bail;
4169 		}
4170 		if ((kmc != 0) &&
4171 		    ((inbound_target->ipsa_kmc != 0) ||
4172 		    (inbound_target->ipsa_kmc != kmc))) {
4173 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4174 			error = EINVAL;
4175 			goto bail;
4176 		}
4177 	}
4178 
4179 	if (outbound_target != NULL) {
4180 		sadb_update_lifetimes(outbound_target, hard, soft);
4181 		if (kmp != 0)
4182 			outbound_target->ipsa_kmp = kmp;
4183 		if (kmc != 0)
4184 			outbound_target->ipsa_kmc = kmc;
4185 	}
4186 
4187 	if (inbound_target != NULL) {
4188 		sadb_update_lifetimes(inbound_target, hard, soft);
4189 		if (kmp != 0)
4190 			inbound_target->ipsa_kmp = kmp;
4191 		if (kmc != 0)
4192 			inbound_target->ipsa_kmc = kmc;
4193 	}
4194 
4195 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4196 	    ksi, (outbound_target == NULL) ? inbound_target : outbound_target);
4197 
4198 bail:
4199 	/*
4200 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
4201 	 * them in { }.
4202 	 */
4203 	if (outbound_target != NULL) {
4204 		IPSA_REFRELE(outbound_target);
4205 	}
4206 	if (inbound_target != NULL) {
4207 		IPSA_REFRELE(inbound_target);
4208 	}
4209 
4210 	return (error);
4211 }
4212 
4213 /*
4214  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4215  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4216  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4217  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4218  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4219  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4220  * other direction's SA.
4221  */
4222 
4223 /*
4224  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4225  * grab it, lock it, and return it.  Otherwise return NULL.
4226  */
4227 static ipsacq_t *
4228 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4229     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4230     uint64_t unique_id)
4231 {
4232 	ipsacq_t *walker;
4233 	sa_family_t fam;
4234 	uint32_t blank_address[4] = {0, 0, 0, 0};
4235 
4236 	if (isrc == NULL) {
4237 		ASSERT(idst == NULL);
4238 		isrc = idst = blank_address;
4239 	}
4240 
4241 	/*
4242 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4243 	 *
4244 	 * XXX May need search for duplicates based on other things too!
4245 	 */
4246 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4247 	    walker = walker->ipsacq_next) {
4248 		mutex_enter(&walker->ipsacq_lock);
4249 		fam = walker->ipsacq_addrfam;
4250 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4251 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4252 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4253 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4254 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4255 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4256 		    (ap == walker->ipsacq_act) &&
4257 		    (pp == walker->ipsacq_policy) &&
4258 		    /* XXX do deep compares of ap/pp? */
4259 		    (unique_id == walker->ipsacq_unique_id))
4260 			break;			/* everything matched */
4261 		mutex_exit(&walker->ipsacq_lock);
4262 	}
4263 
4264 	return (walker);
4265 }
4266 
4267 /*
4268  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4269  * of all of the same length.  Give up (and drop) if memory
4270  * cannot be allocated for a new one; otherwise, invoke callback to
4271  * send the acquire up..
4272  *
4273  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4274  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4275  * and handle this case specially.
4276  */
4277 void
4278 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4279 {
4280 	sadbp_t *spp;
4281 	sadb_t *sp;
4282 	ipsacq_t *newbie;
4283 	iacqf_t *bucket;
4284 	mblk_t *datamp = mp->b_cont;
4285 	mblk_t *extended;
4286 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4287 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4288 	uint32_t *src, *dst, *isrc, *idst;
4289 	ipsec_policy_t *pp = io->ipsec_out_policy;
4290 	ipsec_action_t *ap = io->ipsec_out_act;
4291 	sa_family_t af;
4292 	int hashoffset;
4293 	uint32_t seq;
4294 	uint64_t unique_id = 0;
4295 	ipsec_selector_t sel;
4296 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
4297 	netstack_t	*ns = io->ipsec_out_ns;
4298 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4299 
4300 	ASSERT((pp != NULL) || (ap != NULL));
4301 
4302 	ASSERT(need_ah != NULL || need_esp != NULL);
4303 	/* Assign sadb pointers */
4304 	if (need_esp) { /* ESP for AH+ESP */
4305 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4306 
4307 		spp = &espstack->esp_sadb;
4308 	} else {
4309 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4310 
4311 		spp = &ahstack->ah_sadb;
4312 	}
4313 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
4314 
4315 	if (ap == NULL)
4316 		ap = pp->ipsp_act;
4317 
4318 	ASSERT(ap != NULL);
4319 
4320 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4321 		unique_id = SA_FORM_UNIQUE_ID(io);
4322 
4323 	/*
4324 	 * Set up an ACQUIRE record.
4325 	 *
4326 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4327 	 * below the lowest point allowed in the kernel.  (In other words,
4328 	 * make sure the high bit on the sequence number is set.)
4329 	 */
4330 
4331 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4332 
4333 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4334 		src = (uint32_t *)&ipha->ipha_src;
4335 		dst = (uint32_t *)&ipha->ipha_dst;
4336 		af = AF_INET;
4337 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4338 		ASSERT(io->ipsec_out_v4 == B_TRUE);
4339 	} else {
4340 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4341 		src = (uint32_t *)&ip6h->ip6_src;
4342 		dst = (uint32_t *)&ip6h->ip6_dst;
4343 		af = AF_INET6;
4344 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4345 		ASSERT(io->ipsec_out_v4 == B_FALSE);
4346 	}
4347 
4348 	if (tunnel_mode) {
4349 		/* Snag inner addresses. */
4350 		isrc = io->ipsec_out_insrc;
4351 		idst = io->ipsec_out_indst;
4352 	} else {
4353 		isrc = idst = NULL;
4354 	}
4355 
4356 	/*
4357 	 * Check buckets to see if there is an existing entry.  If so,
4358 	 * grab it.  sadb_checkacquire locks newbie if found.
4359 	 */
4360 	bucket = &(sp->sdb_acq[hashoffset]);
4361 	mutex_enter(&bucket->iacqf_lock);
4362 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4363 	    unique_id);
4364 
4365 	if (newbie == NULL) {
4366 		/*
4367 		 * Otherwise, allocate a new one.
4368 		 */
4369 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4370 		if (newbie == NULL) {
4371 			mutex_exit(&bucket->iacqf_lock);
4372 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
4373 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4374 			    &ipss->ipsec_sadb_dropper);
4375 			return;
4376 		}
4377 		newbie->ipsacq_policy = pp;
4378 		if (pp != NULL) {
4379 			IPPOL_REFHOLD(pp);
4380 		}
4381 		IPACT_REFHOLD(ap);
4382 		newbie->ipsacq_act = ap;
4383 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4384 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4385 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4386 		if (newbie->ipsacq_next != NULL)
4387 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4388 		bucket->iacqf_ipsacq = newbie;
4389 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4390 		mutex_enter(&newbie->ipsacq_lock);
4391 	}
4392 
4393 	mutex_exit(&bucket->iacqf_lock);
4394 
4395 	/*
4396 	 * This assert looks silly for now, but we may need to enter newbie's
4397 	 * mutex during a search.
4398 	 */
4399 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4400 
4401 	mp->b_next = NULL;
4402 	/* Queue up packet.  Use b_next. */
4403 	if (newbie->ipsacq_numpackets == 0) {
4404 		/* First one. */
4405 		newbie->ipsacq_mp = mp;
4406 		newbie->ipsacq_numpackets = 1;
4407 		newbie->ipsacq_expire = gethrestime_sec();
4408 		/*
4409 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4410 		 * value.
4411 		 */
4412 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
4413 		newbie->ipsacq_seq = seq;
4414 		newbie->ipsacq_addrfam = af;
4415 
4416 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
4417 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
4418 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
4419 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
4420 		if (tunnel_mode) {
4421 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
4422 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
4423 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
4424 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
4425 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
4426 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4427 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
4428 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4429 			    io->ipsec_out_indst, io->ipsec_out_inaf);
4430 		} else {
4431 			newbie->ipsacq_proto = io->ipsec_out_proto;
4432 		}
4433 		newbie->ipsacq_unique_id = unique_id;
4434 	} else {
4435 		/* Scan to the end of the list & insert. */
4436 		mblk_t *lastone = newbie->ipsacq_mp;
4437 
4438 		while (lastone->b_next != NULL)
4439 			lastone = lastone->b_next;
4440 		lastone->b_next = mp;
4441 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4442 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
4443 			lastone = newbie->ipsacq_mp;
4444 			newbie->ipsacq_mp = lastone->b_next;
4445 			lastone->b_next = NULL;
4446 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
4447 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
4448 			    &ipss->ipsec_sadb_dropper);
4449 		} else {
4450 			IP_ACQUIRE_STAT(ipss, qhiwater,
4451 			    newbie->ipsacq_numpackets);
4452 		}
4453 	}
4454 
4455 	/*
4456 	 * Reset addresses.  Set them to the most recently added mblk chain,
4457 	 * so that the address pointers in the acquire record will point
4458 	 * at an mblk still attached to the acquire list.
4459 	 */
4460 
4461 	newbie->ipsacq_srcaddr = src;
4462 	newbie->ipsacq_dstaddr = dst;
4463 
4464 	/*
4465 	 * If the acquire record has more than one queued packet, we've
4466 	 * already sent an ACQUIRE, and don't need to repeat ourself.
4467 	 */
4468 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4469 		/* I have an acquire outstanding already! */
4470 		mutex_exit(&newbie->ipsacq_lock);
4471 		return;
4472 	}
4473 
4474 	if (keysock_extended_reg(ns)) {
4475 		/*
4476 		 * Construct an extended ACQUIRE.  There are logging
4477 		 * opportunities here in failure cases.
4478 		 */
4479 
4480 		(void) memset(&sel, 0, sizeof (sel));
4481 		sel.ips_isv4 = io->ipsec_out_v4;
4482 		if (tunnel_mode) {
4483 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
4484 			    IPPROTO_ENCAP : IPPROTO_IPV6;
4485 		} else {
4486 			sel.ips_protocol = io->ipsec_out_proto;
4487 			sel.ips_local_port = io->ipsec_out_src_port;
4488 			sel.ips_remote_port = io->ipsec_out_dst_port;
4489 		}
4490 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
4491 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
4492 		sel.ips_is_icmp_inv_acq = 0;
4493 		if (af == AF_INET) {
4494 			sel.ips_local_addr_v4 = ipha->ipha_src;
4495 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
4496 		} else {
4497 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4498 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4499 		}
4500 
4501 		extended = sadb_keysock_out(0);
4502 		if (extended != NULL) {
4503 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
4504 			    tunnel_mode, seq, 0, ns);
4505 			if (extended->b_cont == NULL) {
4506 				freeb(extended);
4507 				extended = NULL;
4508 			}
4509 		}
4510 	} else
4511 		extended = NULL;
4512 
4513 	/*
4514 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4515 	 * this new record.  The send-acquire callback assumes that acqrec is
4516 	 * already locked.
4517 	 */
4518 	(*spp->s_acqfn)(newbie, extended, ns);
4519 }
4520 
4521 /*
4522  * Unlink and free an acquire record.
4523  */
4524 void
4525 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
4526 {
4527 	mblk_t *mp;
4528 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4529 
4530 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4531 
4532 	if (acqrec->ipsacq_policy != NULL) {
4533 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
4534 	}
4535 	if (acqrec->ipsacq_act != NULL) {
4536 		IPACT_REFRELE(acqrec->ipsacq_act);
4537 	}
4538 
4539 	/* Unlink */
4540 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4541 	if (acqrec->ipsacq_next != NULL)
4542 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4543 
4544 	/*
4545 	 * Free hanging mp's.
4546 	 *
4547 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4548 	 */
4549 
4550 	mutex_enter(&acqrec->ipsacq_lock);
4551 	while (acqrec->ipsacq_mp != NULL) {
4552 		mp = acqrec->ipsacq_mp;
4553 		acqrec->ipsacq_mp = mp->b_next;
4554 		mp->b_next = NULL;
4555 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
4556 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
4557 		    &ipss->ipsec_sadb_dropper);
4558 	}
4559 	mutex_exit(&acqrec->ipsacq_lock);
4560 
4561 	/* Free */
4562 	mutex_destroy(&acqrec->ipsacq_lock);
4563 	kmem_free(acqrec, sizeof (*acqrec));
4564 }
4565 
4566 /*
4567  * Destroy an acquire list fanout.
4568  */
4569 static void
4570 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
4571     netstack_t *ns)
4572 {
4573 	int i;
4574 	iacqf_t *list = *listp;
4575 
4576 	if (list == NULL)
4577 		return;
4578 
4579 	for (i = 0; i < numentries; i++) {
4580 		mutex_enter(&(list[i].iacqf_lock));
4581 		while (list[i].iacqf_ipsacq != NULL)
4582 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
4583 		mutex_exit(&(list[i].iacqf_lock));
4584 		if (forever)
4585 			mutex_destroy(&(list[i].iacqf_lock));
4586 	}
4587 
4588 	if (forever) {
4589 		*listp = NULL;
4590 		kmem_free(list, numentries * sizeof (*list));
4591 	}
4592 }
4593 
4594 /*
4595  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4596  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4597  */
4598 static uint8_t *
4599 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4600     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4601     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
4602 {
4603 	uint8_t *cur = start;
4604 	ipsec_alginfo_t *algp;
4605 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4606 
4607 	cur += sizeof (*algdesc);
4608 	if (cur >= limit)
4609 		return (NULL);
4610 
4611 	ecomb->sadb_x_ecomb_numalgs++;
4612 
4613 	/*
4614 	 * Normalize vs. crypto framework's limits.  This way, you can specify
4615 	 * a stronger policy, and when the framework loads a stronger version,
4616 	 * you can just keep plowing w/o rewhacking your SPD.
4617 	 */
4618 	mutex_enter(&ipss->ipsec_alg_lock);
4619 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4620 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4621 	if (algp == NULL) {
4622 		mutex_exit(&ipss->ipsec_alg_lock);
4623 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
4624 	}
4625 	if (minbits < algp->alg_ef_minbits)
4626 		minbits = algp->alg_ef_minbits;
4627 	if (maxbits > algp->alg_ef_maxbits)
4628 		maxbits = algp->alg_ef_maxbits;
4629 	mutex_exit(&ipss->ipsec_alg_lock);
4630 
4631 	algdesc->sadb_x_algdesc_satype = satype;
4632 	algdesc->sadb_x_algdesc_algtype = algtype;
4633 	algdesc->sadb_x_algdesc_alg = alg;
4634 	algdesc->sadb_x_algdesc_minbits = minbits;
4635 	algdesc->sadb_x_algdesc_maxbits = maxbits;
4636 	algdesc->sadb_x_algdesc_reserved = 0;
4637 	return (cur);
4638 }
4639 
4640 /*
4641  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4642  * which must fit before *limit
4643  *
4644  * return NULL if we ran out of room or a pointer to the end of the ecomb.
4645  */
4646 static uint8_t *
4647 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
4648     netstack_t *ns)
4649 {
4650 	uint8_t *cur = start;
4651 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
4652 	ipsec_prot_t *ipp;
4653 	ipsec_stack_t *ipss = ns->netstack_ipsec;
4654 
4655 	cur += sizeof (*ecomb);
4656 	if (cur >= limit)
4657 		return (NULL);
4658 
4659 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
4660 
4661 	ipp = &act->ipa_act.ipa_apply;
4662 
4663 	ecomb->sadb_x_ecomb_numalgs = 0;
4664 	ecomb->sadb_x_ecomb_reserved = 0;
4665 	ecomb->sadb_x_ecomb_reserved2 = 0;
4666 	/*
4667 	 * No limits on allocations, since we really don't support that
4668 	 * concept currently.
4669 	 */
4670 	ecomb->sadb_x_ecomb_soft_allocations = 0;
4671 	ecomb->sadb_x_ecomb_hard_allocations = 0;
4672 
4673 	/*
4674 	 * XXX TBD: Policy or global parameters will eventually be
4675 	 * able to fill in some of these.
4676 	 */
4677 	ecomb->sadb_x_ecomb_flags = 0;
4678 	ecomb->sadb_x_ecomb_soft_bytes = 0;
4679 	ecomb->sadb_x_ecomb_hard_bytes = 0;
4680 	ecomb->sadb_x_ecomb_soft_addtime = 0;
4681 	ecomb->sadb_x_ecomb_hard_addtime = 0;
4682 	ecomb->sadb_x_ecomb_soft_usetime = 0;
4683 	ecomb->sadb_x_ecomb_hard_usetime = 0;
4684 
4685 	if (ipp->ipp_use_ah) {
4686 		cur = sadb_new_algdesc(cur, limit, ecomb,
4687 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
4688 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
4689 		if (cur == NULL)
4690 			return (NULL);
4691 		ipsecah_fill_defs(ecomb, ns);
4692 	}
4693 
4694 	if (ipp->ipp_use_esp) {
4695 		if (ipp->ipp_use_espa) {
4696 			cur = sadb_new_algdesc(cur, limit, ecomb,
4697 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
4698 			    ipp->ipp_esp_auth_alg,
4699 			    ipp->ipp_espa_minbits,
4700 			    ipp->ipp_espa_maxbits, ipss);
4701 			if (cur == NULL)
4702 				return (NULL);
4703 		}
4704 
4705 		cur = sadb_new_algdesc(cur, limit, ecomb,
4706 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
4707 		    ipp->ipp_encr_alg,
4708 		    ipp->ipp_espe_minbits,
4709 		    ipp->ipp_espe_maxbits, ipss);
4710 		if (cur == NULL)
4711 			return (NULL);
4712 		/* Fill in lifetimes if and only if AH didn't already... */
4713 		if (!ipp->ipp_use_ah)
4714 			ipsecesp_fill_defs(ecomb, ns);
4715 	}
4716 
4717 	return (cur);
4718 }
4719 
4720 /*
4721  * Construct an extended ACQUIRE message based on a selector and the resulting
4722  * IPsec action.
4723  *
4724  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
4725  * generation. As a consequence, expect this function to evolve
4726  * rapidly.
4727  */
4728 static mblk_t *
4729 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
4730     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
4731     netstack_t *ns)
4732 {
4733 	mblk_t *mp;
4734 	sadb_msg_t *samsg;
4735 	uint8_t *start, *cur, *end;
4736 	uint32_t *saddrptr, *daddrptr;
4737 	sa_family_t af;
4738 	sadb_prop_t *eprop;
4739 	ipsec_action_t *ap, *an;
4740 	ipsec_selkey_t *ipsl;
4741 	uint8_t proto, pfxlen;
4742 	uint16_t lport, rport;
4743 	uint32_t kmp, kmc;
4744 
4745 	/*
4746 	 * Find the action we want sooner rather than later..
4747 	 */
4748 	an = NULL;
4749 	if (pol == NULL) {
4750 		ap = act;
4751 	} else {
4752 		ap = pol->ipsp_act;
4753 
4754 		if (ap != NULL)
4755 			an = ap->ipa_next;
4756 	}
4757 
4758 	/*
4759 	 * Just take a swag for the allocation for now.	 We can always
4760 	 * alter it later.
4761 	 */
4762 #define	SADB_EXTENDED_ACQUIRE_SIZE	2048
4763 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
4764 	if (mp == NULL)
4765 		return (NULL);
4766 
4767 	start = mp->b_rptr;
4768 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
4769 
4770 	cur = start;
4771 
4772 	samsg = (sadb_msg_t *)cur;
4773 	cur += sizeof (*samsg);
4774 
4775 	samsg->sadb_msg_version = PF_KEY_V2;
4776 	samsg->sadb_msg_type = SADB_ACQUIRE;
4777 	samsg->sadb_msg_errno = 0;
4778 	samsg->sadb_msg_reserved = 0;
4779 	samsg->sadb_msg_satype = 0;
4780 	samsg->sadb_msg_seq = seq;
4781 	samsg->sadb_msg_pid = pid;
4782 
4783 	if (tunnel_mode) {
4784 		/*
4785 		 * Form inner address extensions based NOT on the inner
4786 		 * selectors (i.e. the packet data), but on the policy's
4787 		 * selector key (i.e. the policy's selector information).
4788 		 *
4789 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
4790 		 * same in ipsec_selkey_t (unless the compiler does very
4791 		 * strange things with unions, consult your local C language
4792 		 * lawyer for details).
4793 		 */
4794 		ipsl = &(pol->ipsp_sel->ipsl_key);
4795 		if (ipsl->ipsl_valid & IPSL_IPV4) {
4796 			af = AF_INET;
4797 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
4798 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
4799 		} else {
4800 			af = AF_INET6;
4801 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
4802 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
4803 		}
4804 
4805 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
4806 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
4807 			pfxlen = ipsl->ipsl_local_pfxlen;
4808 		} else {
4809 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
4810 			pfxlen = 0;
4811 		}
4812 		/* XXX What about ICMP type/code? */
4813 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
4814 		    ipsl->ipsl_lport : 0;
4815 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
4816 		    ipsl->ipsl_proto : 0;
4817 
4818 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
4819 		    af, saddrptr, lport, proto, pfxlen);
4820 		if (cur == NULL) {
4821 			freeb(mp);
4822 			return (NULL);
4823 		}
4824 
4825 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
4826 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
4827 			pfxlen = ipsl->ipsl_remote_pfxlen;
4828 		} else {
4829 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
4830 			pfxlen = 0;
4831 		}
4832 		/* XXX What about ICMP type/code? */
4833 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
4834 		    ipsl->ipsl_rport : 0;
4835 
4836 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
4837 		    af, daddrptr, rport, proto, pfxlen);
4838 		if (cur == NULL) {
4839 			freeb(mp);
4840 			return (NULL);
4841 		}
4842 		/*
4843 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
4844 		 * _with_ inner-packet address selectors, we'll need to further
4845 		 * distinguish tunnel mode here.  For now, having inner
4846 		 * addresses and/or ports is sufficient.
4847 		 *
4848 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
4849 		 * outer addresses.
4850 		 */
4851 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
4852 		lport = rport = 0;
4853 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
4854 		proto = 0;
4855 		lport = 0;
4856 		rport = 0;
4857 		if (pol != NULL) {
4858 			ipsl = &(pol->ipsp_sel->ipsl_key);
4859 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
4860 				proto = ipsl->ipsl_proto;
4861 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
4862 				rport = ipsl->ipsl_rport;
4863 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
4864 				lport = ipsl->ipsl_lport;
4865 		}
4866 	} else {
4867 		proto = sel->ips_protocol;
4868 		lport = sel->ips_local_port;
4869 		rport = sel->ips_remote_port;
4870 	}
4871 
4872 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
4873 
4874 	/*
4875 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
4876 	 * ipsec_selector_t.
4877 	 */
4878 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
4879 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
4880 
4881 	if (cur == NULL) {
4882 		freeb(mp);
4883 		return (NULL);
4884 	}
4885 
4886 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
4887 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
4888 
4889 	if (cur == NULL) {
4890 		freeb(mp);
4891 		return (NULL);
4892 	}
4893 
4894 	/*
4895 	 * This section will change a lot as policy evolves.
4896 	 * For now, it'll be relatively simple.
4897 	 */
4898 	eprop = (sadb_prop_t *)cur;
4899 	cur += sizeof (*eprop);
4900 	if (cur > end) {
4901 		/* no space left */
4902 		freeb(mp);
4903 		return (NULL);
4904 	}
4905 
4906 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
4907 	eprop->sadb_x_prop_ereserved = 0;
4908 	eprop->sadb_x_prop_numecombs = 0;
4909 	eprop->sadb_prop_replay = 32;	/* default */
4910 
4911 	kmc = kmp = 0;
4912 
4913 	for (; ap != NULL; ap = an) {
4914 		an = (pol != NULL) ? ap->ipa_next : NULL;
4915 
4916 		/*
4917 		 * Skip non-IPsec policies
4918 		 */
4919 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
4920 			continue;
4921 
4922 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
4923 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
4924 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
4925 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
4926 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
4927 			eprop->sadb_prop_replay =
4928 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
4929 		}
4930 
4931 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
4932 		if (cur == NULL) { /* no space */
4933 			freeb(mp);
4934 			return (NULL);
4935 		}
4936 		eprop->sadb_x_prop_numecombs++;
4937 	}
4938 
4939 	if (eprop->sadb_x_prop_numecombs == 0) {
4940 		/*
4941 		 * This will happen if we fail to find a policy
4942 		 * allowing for IPsec processing.
4943 		 * Construct an error message.
4944 		 */
4945 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
4946 		samsg->sadb_msg_errno = ENOENT;
4947 		samsg->sadb_x_msg_diagnostic = 0;
4948 		return (mp);
4949 	}
4950 
4951 	if ((kmp != 0) || (kmc != 0)) {
4952 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
4953 		if (cur == NULL) {
4954 			freeb(mp);
4955 			return (NULL);
4956 		}
4957 	}
4958 
4959 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
4960 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
4961 	mp->b_wptr = cur;
4962 
4963 	return (mp);
4964 }
4965 
4966 /*
4967  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
4968  *
4969  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
4970  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
4971  * maximize code consolidation while preventing algorithm changes from messing
4972  * with the callers finishing touches on the ACQUIRE itself.
4973  */
4974 mblk_t *
4975 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
4976 {
4977 	uint_t allocsize;
4978 	mblk_t *pfkeymp, *msgmp;
4979 	sa_family_t af;
4980 	uint8_t *cur, *end;
4981 	sadb_msg_t *samsg;
4982 	uint16_t sport_typecode;
4983 	uint16_t dport_typecode;
4984 	uint8_t check_proto;
4985 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
4986 
4987 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
4988 
4989 	pfkeymp = sadb_keysock_out(0);
4990 	if (pfkeymp == NULL)
4991 		return (NULL);
4992 
4993 	/*
4994 	 * First, allocate a basic ACQUIRE message
4995 	 */
4996 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
4997 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
4998 
4999 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5000 	allocsize += 2 * sizeof (struct sockaddr_in6);
5001 
5002 	mutex_enter(&ipss->ipsec_alg_lock);
5003 	/* NOTE:  The lock is now held through to this function's return. */
5004 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5005 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5006 
5007 	if (tunnel_mode) {
5008 		/* Tunnel mode! */
5009 		allocsize += 2 * sizeof (sadb_address_t);
5010 		/* Enough to cover both AF_INET and AF_INET6. */
5011 		allocsize += 2 * sizeof (struct sockaddr_in6);
5012 	}
5013 
5014 	msgmp = allocb(allocsize, BPRI_HI);
5015 	if (msgmp == NULL) {
5016 		freeb(pfkeymp);
5017 		mutex_exit(&ipss->ipsec_alg_lock);
5018 		return (NULL);
5019 	}
5020 
5021 	pfkeymp->b_cont = msgmp;
5022 	cur = msgmp->b_rptr;
5023 	end = cur + allocsize;
5024 	samsg = (sadb_msg_t *)cur;
5025 	cur += sizeof (sadb_msg_t);
5026 
5027 	af = acqrec->ipsacq_addrfam;
5028 	switch (af) {
5029 	case AF_INET:
5030 		check_proto = IPPROTO_ICMP;
5031 		break;
5032 	case AF_INET6:
5033 		check_proto = IPPROTO_ICMPV6;
5034 		break;
5035 	default:
5036 		/* This should never happen unless we have kernel bugs. */
5037 		cmn_err(CE_WARN,
5038 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5039 		ASSERT(0);
5040 		mutex_exit(&ipss->ipsec_alg_lock);
5041 		return (NULL);
5042 	}
5043 
5044 	samsg->sadb_msg_version = PF_KEY_V2;
5045 	samsg->sadb_msg_type = SADB_ACQUIRE;
5046 	samsg->sadb_msg_satype = satype;
5047 	samsg->sadb_msg_errno = 0;
5048 	samsg->sadb_msg_pid = 0;
5049 	samsg->sadb_msg_reserved = 0;
5050 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5051 
5052 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5053 
5054 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5055 		sport_typecode = dport_typecode = 0;
5056 	} else {
5057 		sport_typecode = acqrec->ipsacq_srcport;
5058 		dport_typecode = acqrec->ipsacq_dstport;
5059 	}
5060 
5061 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5062 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5063 
5064 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5065 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5066 
5067 	if (tunnel_mode) {
5068 		sport_typecode = acqrec->ipsacq_srcport;
5069 		dport_typecode = acqrec->ipsacq_dstport;
5070 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5071 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5072 		    sport_typecode, acqrec->ipsacq_inner_proto,
5073 		    acqrec->ipsacq_innersrcpfx);
5074 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5075 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5076 		    dport_typecode, acqrec->ipsacq_inner_proto,
5077 		    acqrec->ipsacq_innerdstpfx);
5078 	}
5079 
5080 	/* XXX Insert identity information here. */
5081 
5082 	/* XXXMLS Insert sensitivity information here. */
5083 
5084 	if (cur != NULL)
5085 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5086 	else
5087 		mutex_exit(&ipss->ipsec_alg_lock);
5088 
5089 	return (pfkeymp);
5090 }
5091 
5092 /*
5093  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5094  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5095  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5096  * (ipsa_t *)-1).
5097  *
5098  * master_spi is passed in host order.
5099  */
5100 ipsa_t *
5101 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5102     netstack_t *ns)
5103 {
5104 	sadb_address_t *src =
5105 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5106 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5107 	sadb_spirange_t *range =
5108 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5109 	struct sockaddr_in *ssa, *dsa;
5110 	struct sockaddr_in6 *ssa6, *dsa6;
5111 	uint32_t *srcaddr, *dstaddr;
5112 	sa_family_t af;
5113 	uint32_t add, min, max;
5114 
5115 	if (src == NULL) {
5116 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5117 		return ((ipsa_t *)-1);
5118 	}
5119 	if (dst == NULL) {
5120 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5121 		return ((ipsa_t *)-1);
5122 	}
5123 	if (range == NULL) {
5124 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5125 		return ((ipsa_t *)-1);
5126 	}
5127 
5128 	min = ntohl(range->sadb_spirange_min);
5129 	max = ntohl(range->sadb_spirange_max);
5130 	dsa = (struct sockaddr_in *)(dst + 1);
5131 	dsa6 = (struct sockaddr_in6 *)dsa;
5132 
5133 	ssa = (struct sockaddr_in *)(src + 1);
5134 	ssa6 = (struct sockaddr_in6 *)ssa;
5135 	ASSERT(dsa->sin_family == ssa->sin_family);
5136 
5137 	srcaddr = ALL_ZEROES_PTR;
5138 	af = dsa->sin_family;
5139 	switch (af) {
5140 	case AF_INET:
5141 		if (src != NULL)
5142 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5143 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5144 		break;
5145 	case AF_INET6:
5146 		if (src != NULL)
5147 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5148 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5149 		break;
5150 	default:
5151 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5152 		return ((ipsa_t *)-1);
5153 	}
5154 
5155 	if (master_spi < min || master_spi > max) {
5156 		/* Return a random value in the range. */
5157 		(void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5158 		master_spi = min + (add % (max - min + 1));
5159 	}
5160 
5161 	/*
5162 	 * Since master_spi is passed in host order, we need to htonl() it
5163 	 * for the purposes of creating a new SA.
5164 	 */
5165 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5166 	    ns));
5167 }
5168 
5169 /*
5170  *
5171  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5172  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5173  * and scan for the sequence number in question.  I may wish to accept an
5174  * address pair with it, for easier searching.
5175  *
5176  * Caller frees the message, so we don't have to here.
5177  *
5178  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5179  *		failures.
5180  */
5181 /* ARGSUSED */
5182 void
5183 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5184 {
5185 	int i;
5186 	ipsacq_t *acqrec;
5187 	iacqf_t *bucket;
5188 
5189 	/*
5190 	 * I only accept the base header for this!
5191 	 * Though to be honest, requiring the dst address would help
5192 	 * immensely.
5193 	 *
5194 	 * XXX	There are already cases where I can get the dst address.
5195 	 */
5196 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5197 		return;
5198 
5199 	/*
5200 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5201 	 * (and in the future send a message to IP with the appropriate error
5202 	 * number).
5203 	 *
5204 	 * Q: Do I want to reject if pid != 0?
5205 	 */
5206 
5207 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5208 		bucket = &sp->s_v4.sdb_acq[i];
5209 		mutex_enter(&bucket->iacqf_lock);
5210 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5211 		    acqrec = acqrec->ipsacq_next) {
5212 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5213 				break;	/* for acqrec... loop. */
5214 		}
5215 		if (acqrec != NULL)
5216 			break;	/* for i = 0... loop. */
5217 
5218 		mutex_exit(&bucket->iacqf_lock);
5219 	}
5220 
5221 	if (acqrec == NULL) {
5222 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5223 			bucket = &sp->s_v6.sdb_acq[i];
5224 			mutex_enter(&bucket->iacqf_lock);
5225 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5226 			    acqrec = acqrec->ipsacq_next) {
5227 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5228 					break;	/* for acqrec... loop. */
5229 			}
5230 			if (acqrec != NULL)
5231 				break;	/* for i = 0... loop. */
5232 
5233 			mutex_exit(&bucket->iacqf_lock);
5234 		}
5235 	}
5236 
5237 
5238 	if (acqrec == NULL)
5239 		return;
5240 
5241 	/*
5242 	 * What do I do with the errno and IP?	I may need mp's services a
5243 	 * little more.	 See sadb_destroy_acquire() for future directions
5244 	 * beyond free the mblk chain on the acquire record.
5245 	 */
5246 
5247 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5248 	sadb_destroy_acquire(acqrec, ns);
5249 	/* Have to exit mutex here, because of breaking out of for loop. */
5250 	mutex_exit(&bucket->iacqf_lock);
5251 }
5252 
5253 /*
5254  * The following functions work with the replay windows of an SA.  They assume
5255  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5256  * represents the highest sequence number packet received, and back
5257  * (ipsa->ipsa_replay_wsize) packets.
5258  */
5259 
5260 /*
5261  * Is the replay bit set?
5262  */
5263 static boolean_t
5264 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5265 {
5266 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5267 
5268 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5269 }
5270 
5271 /*
5272  * Shift the bits of the replay window over.
5273  */
5274 static void
5275 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5276 {
5277 	int i;
5278 	int jump = ((shift - 1) >> 6) + 1;
5279 
5280 	if (shift == 0)
5281 		return;
5282 
5283 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5284 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5285 			ipsa->ipsa_replay_arr[i + jump] |=
5286 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5287 		}
5288 		ipsa->ipsa_replay_arr[i] <<= shift;
5289 	}
5290 }
5291 
5292 /*
5293  * Set a bit in the bit vector.
5294  */
5295 static void
5296 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5297 {
5298 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5299 
5300 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5301 }
5302 
5303 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
5304 
5305 /*
5306  * Assume caller has NOT done ntohl() already on seq.  Check to see
5307  * if replay sequence number "seq" has been seen already.
5308  */
5309 boolean_t
5310 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5311 {
5312 	boolean_t rc;
5313 	uint32_t diff;
5314 
5315 	if (ipsa->ipsa_replay_wsize == 0)
5316 		return (B_TRUE);
5317 
5318 	/*
5319 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5320 	 */
5321 
5322 	/* Convert sequence number into host order before holding the mutex. */
5323 	seq = ntohl(seq);
5324 
5325 	mutex_enter(&ipsa->ipsa_lock);
5326 
5327 	/* Initialize inbound SA's ipsa_replay field to last one received. */
5328 	if (ipsa->ipsa_replay == 0)
5329 		ipsa->ipsa_replay = 1;
5330 
5331 	if (seq > ipsa->ipsa_replay) {
5332 		/*
5333 		 * I have received a new "highest value received".  Shift
5334 		 * the replay window over.
5335 		 */
5336 		diff = seq - ipsa->ipsa_replay;
5337 		if (diff < ipsa->ipsa_replay_wsize) {
5338 			/* In replay window, shift bits over. */
5339 			ipsa_shift_replay(ipsa, diff);
5340 		} else {
5341 			/* WAY FAR AHEAD, clear bits and start again. */
5342 			bzero(ipsa->ipsa_replay_arr,
5343 			    sizeof (ipsa->ipsa_replay_arr));
5344 		}
5345 		ipsa_set_replay(ipsa, 0);
5346 		ipsa->ipsa_replay = seq;
5347 		rc = B_TRUE;
5348 		goto done;
5349 	}
5350 	diff = ipsa->ipsa_replay - seq;
5351 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5352 		rc = B_FALSE;
5353 		goto done;
5354 	}
5355 	/* Set this packet as seen. */
5356 	ipsa_set_replay(ipsa, diff);
5357 
5358 	rc = B_TRUE;
5359 done:
5360 	mutex_exit(&ipsa->ipsa_lock);
5361 	return (rc);
5362 }
5363 
5364 /*
5365  * "Peek" and see if we should even bother going through the effort of
5366  * running an authentication check on the sequence number passed in.
5367  * this takes into account packets that are below the replay window,
5368  * and collisions with already replayed packets.  Return B_TRUE if it
5369  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5370  * Assume same byte-ordering as sadb_replay_check.
5371  */
5372 boolean_t
5373 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5374 {
5375 	boolean_t rc = B_FALSE;
5376 	uint32_t diff;
5377 
5378 	if (ipsa->ipsa_replay_wsize == 0)
5379 		return (B_TRUE);
5380 
5381 	/*
5382 	 * 0 is 0, regardless of byte order... :)
5383 	 *
5384 	 * If I get 0 on the wire (and there is a replay window) then the
5385 	 * sender most likely wrapped.	This ipsa may need to be marked or
5386 	 * something.
5387 	 */
5388 	if (seq == 0)
5389 		return (B_FALSE);
5390 
5391 	seq = ntohl(seq);
5392 	mutex_enter(&ipsa->ipsa_lock);
5393 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5394 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5395 		goto done;
5396 
5397 	/*
5398 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
5399 	 * bother with formalities.  I'm not accepting any more packets
5400 	 * on this SA.
5401 	 */
5402 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5403 		/*
5404 		 * Since we're already holding the lock, update the
5405 		 * expire time ala. sadb_replay_delete() and return.
5406 		 */
5407 		ipsa->ipsa_hardexpiretime = (time_t)1;
5408 		goto done;
5409 	}
5410 
5411 	if (seq <= ipsa->ipsa_replay) {
5412 		/*
5413 		 * This seq is in the replay window.  I'm not below it,
5414 		 * because I already checked for that above!
5415 		 */
5416 		diff = ipsa->ipsa_replay - seq;
5417 		if (ipsa_is_replay_set(ipsa, diff))
5418 			goto done;
5419 	}
5420 	/* Else return B_TRUE, I'm going to advance the window. */
5421 
5422 	rc = B_TRUE;
5423 done:
5424 	mutex_exit(&ipsa->ipsa_lock);
5425 	return (rc);
5426 }
5427 
5428 /*
5429  * Delete a single SA.
5430  *
5431  * For now, use the quick-and-dirty trick of making the association's
5432  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5433  */
5434 void
5435 sadb_replay_delete(ipsa_t *assoc)
5436 {
5437 	mutex_enter(&assoc->ipsa_lock);
5438 	assoc->ipsa_hardexpiretime = (time_t)1;
5439 	mutex_exit(&assoc->ipsa_lock);
5440 }
5441 
5442 /*
5443  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
5444  * down.  The caller will handle the T_BIND_ACK locally.
5445  */
5446 boolean_t
5447 sadb_t_bind_req(queue_t *q, int proto)
5448 {
5449 	struct T_bind_req *tbr;
5450 	mblk_t *mp;
5451 
5452 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
5453 	if (mp == NULL) {
5454 		/* cmn_err(CE_WARN, */
5455 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
5456 		return (B_FALSE);
5457 	}
5458 	mp->b_datap->db_type = M_PCPROTO;
5459 	tbr = (struct T_bind_req *)mp->b_rptr;
5460 	mp->b_wptr += sizeof (struct T_bind_req);
5461 	tbr->PRIM_type = T_BIND_REQ;
5462 	tbr->ADDR_length = 0;
5463 	tbr->ADDR_offset = 0;
5464 	tbr->CONIND_number = 0;
5465 	*mp->b_wptr = (uint8_t)proto;
5466 	mp->b_wptr++;
5467 
5468 	putnext(q, mp);
5469 	return (B_TRUE);
5470 }
5471 
5472 /*
5473  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5474  * this is designed to take only a format string with "* %x * %s *", so
5475  * that "spi" is printed first, then "addr" is converted using inet_pton().
5476  *
5477  * This is abstracted out to save the stack space for only when inet_pton()
5478  * is called.  Make sure "spi" is in network order; it usually is when this
5479  * would get called.
5480  */
5481 void
5482 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5483     uint32_t spi, void *addr, int af, netstack_t *ns)
5484 {
5485 	char buf[INET6_ADDRSTRLEN];
5486 
5487 	ASSERT(af == AF_INET6 || af == AF_INET);
5488 
5489 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
5490 	    inet_ntop(af, addr, buf, sizeof (buf)));
5491 }
5492 
5493 /*
5494  * Fills in a reference to the policy, if any, from the conn, in *ppp
5495  * Releases a reference to the passed conn_t.
5496  */
5497 static void
5498 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5499 {
5500 	ipsec_policy_t	*pp;
5501 	ipsec_latch_t	*ipl = connp->conn_latch;
5502 
5503 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
5504 		pp = ipl->ipl_out_policy;
5505 		IPPOL_REFHOLD(pp);
5506 	} else {
5507 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
5508 		    connp->conn_netstack);
5509 	}
5510 	*ppp = pp;
5511 	CONN_DEC_REF(connp);
5512 }
5513 
5514 /*
5515  * The following functions scan through active conn_t structures
5516  * and return a reference to the best-matching policy it can find.
5517  * Caller must release the reference.
5518  */
5519 static void
5520 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5521 {
5522 	connf_t *connfp;
5523 	conn_t *connp = NULL;
5524 	ipsec_selector_t portonly;
5525 
5526 	bzero((void*)&portonly, sizeof (portonly));
5527 
5528 	if (sel->ips_local_port == 0)
5529 		return;
5530 
5531 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
5532 	    ipst)];
5533 	mutex_enter(&connfp->connf_lock);
5534 
5535 	if (sel->ips_isv4) {
5536 		connp = connfp->connf_head;
5537 		while (connp != NULL) {
5538 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5539 			    sel->ips_local_addr_v4, sel->ips_remote_port,
5540 			    sel->ips_remote_addr_v4))
5541 				break;
5542 			connp = connp->conn_next;
5543 		}
5544 
5545 		if (connp == NULL) {
5546 			/* Try port-only match in IPv6. */
5547 			portonly.ips_local_port = sel->ips_local_port;
5548 			sel = &portonly;
5549 		}
5550 	}
5551 
5552 	if (connp == NULL) {
5553 		connp = connfp->connf_head;
5554 		while (connp != NULL) {
5555 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5556 			    sel->ips_local_addr_v6, sel->ips_remote_port,
5557 			    sel->ips_remote_addr_v6))
5558 				break;
5559 			connp = connp->conn_next;
5560 		}
5561 
5562 		if (connp == NULL) {
5563 			mutex_exit(&connfp->connf_lock);
5564 			return;
5565 		}
5566 	}
5567 
5568 	CONN_INC_REF(connp);
5569 	mutex_exit(&connfp->connf_lock);
5570 
5571 	ipsec_conn_pol(sel, connp, ppp);
5572 }
5573 
5574 static conn_t *
5575 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
5576 {
5577 	connf_t *connfp;
5578 	conn_t *connp = NULL;
5579 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
5580 
5581 	if (sel->ips_local_port == 0)
5582 		return (NULL);
5583 
5584 	connfp = &ipst->ips_ipcl_bind_fanout[
5585 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
5586 	mutex_enter(&connfp->connf_lock);
5587 
5588 	if (sel->ips_isv4) {
5589 		connp = connfp->connf_head;
5590 		while (connp != NULL) {
5591 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
5592 			    sel->ips_local_addr_v4, pptr[1]))
5593 				break;
5594 			connp = connp->conn_next;
5595 		}
5596 
5597 		if (connp == NULL) {
5598 			/* Match to all-zeroes. */
5599 			v6addrmatch = &ipv6_all_zeros;
5600 		}
5601 	}
5602 
5603 	if (connp == NULL) {
5604 		connp = connfp->connf_head;
5605 		while (connp != NULL) {
5606 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
5607 			    *v6addrmatch, pptr[1]))
5608 				break;
5609 			connp = connp->conn_next;
5610 		}
5611 
5612 		if (connp == NULL) {
5613 			mutex_exit(&connfp->connf_lock);
5614 			return (NULL);
5615 		}
5616 	}
5617 
5618 	CONN_INC_REF(connp);
5619 	mutex_exit(&connfp->connf_lock);
5620 	return (connp);
5621 }
5622 
5623 static void
5624 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5625 {
5626 	connf_t 	*connfp;
5627 	conn_t		*connp;
5628 	uint32_t	ports;
5629 	uint16_t	*pptr = (uint16_t *)&ports;
5630 
5631 	/*
5632 	 * Find TCP state in the following order:
5633 	 * 1.) Connected conns.
5634 	 * 2.) Listeners.
5635 	 *
5636 	 * Even though #2 will be the common case for inbound traffic, only
5637 	 * following this order insures correctness.
5638 	 */
5639 
5640 	if (sel->ips_local_port == 0)
5641 		return;
5642 
5643 	/*
5644 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5645 	 * See ipsec_construct_inverse_acquire() for details.
5646 	 */
5647 	pptr[0] = sel->ips_remote_port;
5648 	pptr[1] = sel->ips_local_port;
5649 
5650 	connfp = &ipst->ips_ipcl_conn_fanout[
5651 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
5652 	mutex_enter(&connfp->connf_lock);
5653 	connp = connfp->connf_head;
5654 
5655 	if (sel->ips_isv4) {
5656 		while (connp != NULL) {
5657 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
5658 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
5659 			    ports))
5660 				break;
5661 			connp = connp->conn_next;
5662 		}
5663 	} else {
5664 		while (connp != NULL) {
5665 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
5666 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
5667 			    ports))
5668 				break;
5669 			connp = connp->conn_next;
5670 		}
5671 	}
5672 
5673 	if (connp != NULL) {
5674 		CONN_INC_REF(connp);
5675 		mutex_exit(&connfp->connf_lock);
5676 	} else {
5677 		mutex_exit(&connfp->connf_lock);
5678 
5679 		/* Try the listen hash. */
5680 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
5681 			return;
5682 	}
5683 
5684 	ipsec_conn_pol(sel, connp, ppp);
5685 }
5686 
5687 static void
5688 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5689     ip_stack_t *ipst)
5690 {
5691 	conn_t		*connp;
5692 	uint32_t	ports;
5693 	uint16_t	*pptr = (uint16_t *)&ports;
5694 
5695 	/*
5696 	 * Find SCP state in the following order:
5697 	 * 1.) Connected conns.
5698 	 * 2.) Listeners.
5699 	 *
5700 	 * Even though #2 will be the common case for inbound traffic, only
5701 	 * following this order insures correctness.
5702 	 */
5703 
5704 	if (sel->ips_local_port == 0)
5705 		return;
5706 
5707 	/*
5708 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5709 	 * See ipsec_construct_inverse_acquire() for details.
5710 	 */
5711 	pptr[0] = sel->ips_remote_port;
5712 	pptr[1] = sel->ips_local_port;
5713 
5714 	if (sel->ips_isv4) {
5715 		in6_addr_t	src, dst;
5716 
5717 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
5718 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
5719 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
5720 		    ipst->ips_netstack->netstack_sctp);
5721 	} else {
5722 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
5723 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
5724 		    ipst->ips_netstack->netstack_sctp);
5725 	}
5726 	if (connp == NULL)
5727 		return;
5728 	ipsec_conn_pol(sel, connp, ppp);
5729 }
5730 
5731 /*
5732  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
5733  * Returns 0 or errno, and always sets *diagnostic to something appropriate
5734  * to PF_KEY.
5735  *
5736  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
5737  * ignore prefix lengths in the address extension.  Since we match on first-
5738  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
5739  * set addresses to mask out the lower bits, we should get a suitable search
5740  * key for the SPD anyway.  This is the function to change if the assumption
5741  * about suitable search keys is wrong.
5742  */
5743 static int
5744 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
5745     sadb_address_t *dstext, int *diagnostic)
5746 {
5747 	struct sockaddr_in *src, *dst;
5748 	struct sockaddr_in6 *src6, *dst6;
5749 
5750 	*diagnostic = 0;
5751 
5752 	bzero(sel, sizeof (*sel));
5753 	sel->ips_protocol = srcext->sadb_address_proto;
5754 	dst = (struct sockaddr_in *)(dstext + 1);
5755 	if (dst->sin_family == AF_INET6) {
5756 		dst6 = (struct sockaddr_in6 *)dst;
5757 		src6 = (struct sockaddr_in6 *)(srcext + 1);
5758 		if (src6->sin6_family != AF_INET6) {
5759 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5760 			return (EINVAL);
5761 		}
5762 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
5763 		sel->ips_local_addr_v6 = src6->sin6_addr;
5764 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
5765 			sel->ips_is_icmp_inv_acq = 1;
5766 		} else {
5767 			sel->ips_remote_port = dst6->sin6_port;
5768 			sel->ips_local_port = src6->sin6_port;
5769 		}
5770 		sel->ips_isv4 = B_FALSE;
5771 	} else {
5772 		src = (struct sockaddr_in *)(srcext + 1);
5773 		if (src->sin_family != AF_INET) {
5774 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5775 			return (EINVAL);
5776 		}
5777 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
5778 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
5779 		if (sel->ips_protocol == IPPROTO_ICMP) {
5780 			sel->ips_is_icmp_inv_acq = 1;
5781 		} else {
5782 			sel->ips_remote_port = dst->sin_port;
5783 			sel->ips_local_port = src->sin_port;
5784 		}
5785 		sel->ips_isv4 = B_TRUE;
5786 	}
5787 	return (0);
5788 }
5789 
5790 /*
5791  * We have encapsulation.
5792  * - Lookup tun_t by address and look for an associated
5793  *   tunnel policy
5794  * - If there are inner selectors
5795  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
5796  *   - Look up tunnel policy based on selectors
5797  * - Else
5798  *   - Sanity check the negotation
5799  *   - If appropriate, fall through to global policy
5800  */
5801 static int
5802 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5803     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
5804     int *diagnostic, netstack_t *ns)
5805 {
5806 	int err;
5807 	ipsec_policy_head_t *polhead;
5808 
5809 	/* Check for inner selectors and act appropriately */
5810 
5811 	if (innsrcext != NULL) {
5812 		/* Inner selectors present */
5813 		ASSERT(inndstext != NULL);
5814 		if ((itp == NULL) ||
5815 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
5816 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
5817 			/*
5818 			 * If inner packet selectors, we must have negotiate
5819 			 * tunnel and active policy.  If the tunnel has
5820 			 * transport-mode policy set on it, or has no policy,
5821 			 * fail.
5822 			 */
5823 			return (ENOENT);
5824 		} else {
5825 			/*
5826 			 * Reset "sel" to indicate inner selectors.  Pass
5827 			 * inner PF_KEY address extensions for this to happen.
5828 			 */
5829 			err = ipsec_get_inverse_acquire_sel(sel,
5830 			    innsrcext, inndstext, diagnostic);
5831 			if (err != 0) {
5832 				ITP_REFRELE(itp, ns);
5833 				return (err);
5834 			}
5835 			/*
5836 			 * Now look for a tunnel policy based on those inner
5837 			 * selectors.  (Common code is below.)
5838 			 */
5839 		}
5840 	} else {
5841 		/* No inner selectors present */
5842 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
5843 			/*
5844 			 * Transport mode negotiation with no tunnel policy
5845 			 * configured - return to indicate a global policy
5846 			 * check is needed.
5847 			 */
5848 			if (itp != NULL) {
5849 				ITP_REFRELE(itp, ns);
5850 			}
5851 			return (0);
5852 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
5853 			/* Tunnel mode set with no inner selectors. */
5854 			ITP_REFRELE(itp, ns);
5855 			return (ENOENT);
5856 		}
5857 		/*
5858 		 * Else, this is a tunnel policy configured with ifconfig(1m)
5859 		 * or "negotiate transport" with ipsecconf(1m).  We have an
5860 		 * itp with policy set based on any match, so don't bother
5861 		 * changing fields in "sel".
5862 		 */
5863 	}
5864 
5865 	ASSERT(itp != NULL);
5866 	polhead = itp->itp_policy;
5867 	ASSERT(polhead != NULL);
5868 	rw_enter(&polhead->iph_lock, RW_READER);
5869 	*ppp = ipsec_find_policy_head(NULL, polhead,
5870 	    IPSEC_TYPE_INBOUND, sel, ns);
5871 	rw_exit(&polhead->iph_lock);
5872 	ITP_REFRELE(itp, ns);
5873 
5874 	/*
5875 	 * Don't default to global if we didn't find a matching policy entry.
5876 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
5877 	 */
5878 	if (*ppp == NULL)
5879 		return (ENOENT);
5880 
5881 	return (0);
5882 }
5883 
5884 static void
5885 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5886     ip_stack_t *ipst)
5887 {
5888 	boolean_t	isv4 = sel->ips_isv4;
5889 	connf_t		*connfp;
5890 	conn_t		*connp;
5891 
5892 	if (isv4) {
5893 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
5894 	} else {
5895 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
5896 	}
5897 
5898 	mutex_enter(&connfp->connf_lock);
5899 	for (connp = connfp->connf_head; connp != NULL;
5900 	    connp = connp->conn_next) {
5901 		if (!((isv4 && !((connp->conn_src == 0 ||
5902 		    connp->conn_src == sel->ips_local_addr_v4) &&
5903 		    (connp->conn_rem == 0 ||
5904 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
5905 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
5906 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
5907 		    &sel->ips_local_addr_v6)) &&
5908 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
5909 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
5910 		    &sel->ips_remote_addr_v6)))))) {
5911 			break;
5912 		}
5913 	}
5914 	if (connp == NULL) {
5915 		mutex_exit(&connfp->connf_lock);
5916 		return;
5917 	}
5918 
5919 	CONN_INC_REF(connp);
5920 	mutex_exit(&connfp->connf_lock);
5921 
5922 	ipsec_conn_pol(sel, connp, ppp);
5923 }
5924 
5925 /*
5926  * Construct an inverse ACQUIRE reply based on:
5927  *
5928  * 1.) Current global policy.
5929  * 2.) An conn_t match depending on what all was passed in the extv[].
5930  * 3.) A tunnel's policy head.
5931  * ...
5932  * N.) Other stuff TBD (e.g. identities)
5933  *
5934  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
5935  * in this function so the caller can extract them where appropriately.
5936  *
5937  * The SRC address is the local one - just like an outbound ACQUIRE message.
5938  */
5939 mblk_t *
5940 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
5941     netstack_t *ns)
5942 {
5943 	int err;
5944 	int diagnostic;
5945 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
5946 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
5947 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
5948 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
5949 	struct sockaddr_in6 *src, *dst;
5950 	struct sockaddr_in6 *isrc, *idst;
5951 	ipsec_tun_pol_t *itp = NULL;
5952 	ipsec_policy_t *pp = NULL;
5953 	ipsec_selector_t sel, isel;
5954 	mblk_t *retmp;
5955 	ip_stack_t	*ipst = ns->netstack_ip;
5956 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5957 
5958 	/* Normalize addresses */
5959 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
5960 	    == KS_IN_ADDR_UNKNOWN) {
5961 		err = EINVAL;
5962 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
5963 		goto bail;
5964 	}
5965 	src = (struct sockaddr_in6 *)(srcext + 1);
5966 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
5967 	    == KS_IN_ADDR_UNKNOWN) {
5968 		err = EINVAL;
5969 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
5970 		goto bail;
5971 	}
5972 	dst = (struct sockaddr_in6 *)(dstext + 1);
5973 	if (src->sin6_family != dst->sin6_family) {
5974 		err = EINVAL;
5975 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5976 		goto bail;
5977 	}
5978 
5979 	/* Check for tunnel mode and act appropriately */
5980 	if (innsrcext != NULL) {
5981 		if (inndstext == NULL) {
5982 			err = EINVAL;
5983 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
5984 			goto bail;
5985 		}
5986 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
5987 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
5988 			err = EINVAL;
5989 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
5990 			goto bail;
5991 		}
5992 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
5993 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
5994 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
5995 			err = EINVAL;
5996 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
5997 			goto bail;
5998 		}
5999 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6000 		if (isrc->sin6_family != idst->sin6_family) {
6001 			err = EINVAL;
6002 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6003 			goto bail;
6004 		}
6005 		if (isrc->sin6_family != AF_INET &&
6006 		    isrc->sin6_family != AF_INET6) {
6007 			err = EINVAL;
6008 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6009 			goto bail;
6010 		}
6011 	} else if (inndstext != NULL) {
6012 		err = EINVAL;
6013 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6014 		goto bail;
6015 	}
6016 
6017 	/* Get selectors first, based on outer addresses */
6018 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6019 	if (err != 0)
6020 		goto bail;
6021 
6022 	/* Check for tunnel mode mismatches. */
6023 	if (innsrcext != NULL &&
6024 	    ((isrc->sin6_family == AF_INET &&
6025 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6026 	    (isrc->sin6_family == AF_INET6 &&
6027 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6028 		err = EPROTOTYPE;
6029 		goto bail;
6030 	}
6031 
6032 	/*
6033 	 * Okay, we have the addresses and other selector information.
6034 	 * Let's first find a conn...
6035 	 */
6036 	pp = NULL;
6037 	switch (sel.ips_protocol) {
6038 	case IPPROTO_TCP:
6039 		ipsec_tcp_pol(&sel, &pp, ipst);
6040 		break;
6041 	case IPPROTO_UDP:
6042 		ipsec_udp_pol(&sel, &pp, ipst);
6043 		break;
6044 	case IPPROTO_SCTP:
6045 		ipsec_sctp_pol(&sel, &pp, ipst);
6046 		break;
6047 	case IPPROTO_ENCAP:
6048 	case IPPROTO_IPV6:
6049 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6050 		/*
6051 		 * Assume sel.ips_remote_addr_* has the right address at
6052 		 * that exact position.
6053 		 */
6054 		itp = ipss->ipsec_itp_get_byaddr(
6055 		    (uint32_t *)(&sel.ips_local_addr_v6),
6056 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6057 		    src->sin6_family, ns);
6058 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6059 		if (innsrcext == NULL) {
6060 			/*
6061 			 * Transport-mode tunnel, make sure we fake out isel
6062 			 * to contain something based on the outer protocol.
6063 			 */
6064 			bzero(&isel, sizeof (isel));
6065 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6066 		} /* Else isel is initialized by ipsec_tun_pol(). */
6067 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6068 		    &diagnostic, ns);
6069 		/*
6070 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6071 		 * may be.
6072 		 */
6073 		if (err != 0)
6074 			goto bail;
6075 		break;
6076 	default:
6077 		ipsec_oth_pol(&sel, &pp, ipst);
6078 		break;
6079 	}
6080 
6081 	/*
6082 	 * If we didn't find a matching conn_t or other policy head, take a
6083 	 * look in the global policy.
6084 	 */
6085 	if (pp == NULL) {
6086 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6087 		    ns);
6088 		if (pp == NULL) {
6089 			/* There's no global policy. */
6090 			err = ENOENT;
6091 			diagnostic = 0;
6092 			goto bail;
6093 		}
6094 	}
6095 
6096 	/*
6097 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6098 	 * message based on that, fix fields where appropriate,
6099 	 * and return the message.
6100 	 */
6101 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6102 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6103 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6104 	if (pp != NULL) {
6105 		IPPOL_REFRELE(pp, ns);
6106 	}
6107 	if (retmp != NULL) {
6108 		return (retmp);
6109 	} else {
6110 		err = ENOMEM;
6111 		diagnostic = 0;
6112 	}
6113 bail:
6114 	samsg->sadb_msg_errno = (uint8_t)err;
6115 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6116 	return (NULL);
6117 }
6118 
6119 /*
6120  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6121  * the next two functions.
6122  *
6123  * These functions loop calling casptr() until the swap "happens",
6124  * turning a compare-and-swap op into an atomic swap operation.
6125  */
6126 
6127 /*
6128  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6129  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6130  */
6131 
6132 void
6133 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6134 {
6135 	mblk_t *opkt;
6136 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6137 
6138 	membar_producer();
6139 	do {
6140 		opkt = ipsa->ipsa_lpkt;
6141 	} while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6142 
6143 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6144 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6145 	    &ipss->ipsec_sadb_dropper);
6146 }
6147 
6148 /*
6149  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6150  * previous value.
6151  */
6152 
6153 mblk_t *
6154 sadb_clear_lpkt(ipsa_t *ipsa)
6155 {
6156 	mblk_t *opkt;
6157 
6158 	do {
6159 		opkt = ipsa->ipsa_lpkt;
6160 	} while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6161 
6162 	return (opkt);
6163 }
6164 
6165 /*
6166  * Walker callback used by sadb_alg_update() to free/create crypto
6167  * context template when a crypto software provider is removed or
6168  * added.
6169  */
6170 
6171 struct sadb_update_alg_state {
6172 	ipsec_algtype_t alg_type;
6173 	uint8_t alg_id;
6174 	boolean_t is_added;
6175 };
6176 
6177 static void
6178 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6179 {
6180 	struct sadb_update_alg_state *update_state =
6181 	    (struct sadb_update_alg_state *)cookie;
6182 	crypto_ctx_template_t *ctx_tmpl = NULL;
6183 
6184 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6185 
6186 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6187 		return;
6188 
6189 	mutex_enter(&entry->ipsa_lock);
6190 
6191 	switch (update_state->alg_type) {
6192 	case IPSEC_ALG_AUTH:
6193 		if (entry->ipsa_auth_alg == update_state->alg_id)
6194 			ctx_tmpl = &entry->ipsa_authtmpl;
6195 		break;
6196 	case IPSEC_ALG_ENCR:
6197 		if (entry->ipsa_encr_alg == update_state->alg_id)
6198 			ctx_tmpl = &entry->ipsa_encrtmpl;
6199 		break;
6200 	default:
6201 		ctx_tmpl = NULL;
6202 	}
6203 
6204 	if (ctx_tmpl == NULL) {
6205 		mutex_exit(&entry->ipsa_lock);
6206 		return;
6207 	}
6208 
6209 	/*
6210 	 * The context template of the SA may be affected by the change
6211 	 * of crypto provider.
6212 	 */
6213 	if (update_state->is_added) {
6214 		/* create the context template if not already done */
6215 		if (*ctx_tmpl == NULL) {
6216 			(void) ipsec_create_ctx_tmpl(entry,
6217 			    update_state->alg_type);
6218 		}
6219 	} else {
6220 		/*
6221 		 * The crypto provider was removed. If the context template
6222 		 * exists but it is no longer valid, free it.
6223 		 */
6224 		if (*ctx_tmpl != NULL)
6225 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6226 	}
6227 
6228 	mutex_exit(&entry->ipsa_lock);
6229 }
6230 
6231 /*
6232  * Invoked by IP when an software crypto provider has been updated.
6233  * The type and id of the corresponding algorithm is passed as argument.
6234  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6235  * removed. The function updates the SADB and free/creates the
6236  * context templates associated with SAs if needed.
6237  */
6238 
6239 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
6240     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6241 	&update_state)
6242 
6243 void
6244 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
6245     netstack_t *ns)
6246 {
6247 	struct sadb_update_alg_state update_state;
6248 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
6249 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
6250 
6251 	update_state.alg_type = alg_type;
6252 	update_state.alg_id = alg_id;
6253 	update_state.is_added = is_added;
6254 
6255 	if (alg_type == IPSEC_ALG_AUTH) {
6256 		/* walk the AH tables only for auth. algorithm changes */
6257 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
6258 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
6259 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
6260 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
6261 	}
6262 
6263 	/* walk the ESP tables */
6264 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
6265 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
6266 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
6267 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
6268 }
6269 
6270 /*
6271  * Creates a context template for the specified SA. This function
6272  * is called when an SA is created and when a context template needs
6273  * to be created due to a change of software provider.
6274  */
6275 int
6276 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6277 {
6278 	ipsec_alginfo_t *alg;
6279 	crypto_mechanism_t mech;
6280 	crypto_key_t *key;
6281 	crypto_ctx_template_t *sa_tmpl;
6282 	int rv;
6283 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
6284 
6285 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
6286 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6287 
6288 	/* get pointers to the algorithm info, context template, and key */
6289 	switch (alg_type) {
6290 	case IPSEC_ALG_AUTH:
6291 		key = &sa->ipsa_kcfauthkey;
6292 		sa_tmpl = &sa->ipsa_authtmpl;
6293 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6294 		break;
6295 	case IPSEC_ALG_ENCR:
6296 		key = &sa->ipsa_kcfencrkey;
6297 		sa_tmpl = &sa->ipsa_encrtmpl;
6298 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6299 		break;
6300 	default:
6301 		alg = NULL;
6302 	}
6303 
6304 	if (alg == NULL || !ALG_VALID(alg))
6305 		return (EINVAL);
6306 
6307 	/* initialize the mech info structure for the framework */
6308 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6309 	mech.cm_type = alg->alg_mech_type;
6310 	mech.cm_param = NULL;
6311 	mech.cm_param_len = 0;
6312 
6313 	/* create a new context template */
6314 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6315 
6316 	/*
6317 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6318 	 * providers are available for that mechanism. In that case
6319 	 * we don't fail, and will generate the context template from
6320 	 * the framework callback when a software provider for that
6321 	 * mechanism registers.
6322 	 *
6323 	 * The context template is assigned the special value
6324 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6325 	 * lack of memory. No attempt will be made to use
6326 	 * the context template if it is set to this value.
6327 	 */
6328 	if (rv == CRYPTO_HOST_MEMORY) {
6329 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6330 	} else if (rv != CRYPTO_SUCCESS) {
6331 		*sa_tmpl = NULL;
6332 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6333 			return (EINVAL);
6334 	}
6335 
6336 	return (0);
6337 }
6338 
6339 /*
6340  * Destroy the context template of the specified algorithm type
6341  * of the specified SA. Must be called while holding the SA lock.
6342  */
6343 void
6344 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6345 {
6346 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6347 
6348 	if (alg_type == IPSEC_ALG_AUTH) {
6349 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6350 			sa->ipsa_authtmpl = NULL;
6351 		else if (sa->ipsa_authtmpl != NULL) {
6352 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6353 			sa->ipsa_authtmpl = NULL;
6354 		}
6355 	} else {
6356 		ASSERT(alg_type == IPSEC_ALG_ENCR);
6357 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6358 			sa->ipsa_encrtmpl = NULL;
6359 		else if (sa->ipsa_encrtmpl != NULL) {
6360 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6361 			sa->ipsa_encrtmpl = NULL;
6362 		}
6363 	}
6364 }
6365 
6366 /*
6367  * Use the kernel crypto framework to check the validity of a key received
6368  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6369  */
6370 int
6371 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6372     boolean_t is_auth, int *diag)
6373 {
6374 	crypto_mechanism_t mech;
6375 	crypto_key_t crypto_key;
6376 	int crypto_rc;
6377 
6378 	mech.cm_type = mech_type;
6379 	mech.cm_param = NULL;
6380 	mech.cm_param_len = 0;
6381 
6382 	crypto_key.ck_format = CRYPTO_KEY_RAW;
6383 	crypto_key.ck_data = sadb_key + 1;
6384 	crypto_key.ck_length = sadb_key->sadb_key_bits;
6385 
6386 	crypto_rc = crypto_key_check(&mech, &crypto_key);
6387 
6388 	switch (crypto_rc) {
6389 	case CRYPTO_SUCCESS:
6390 		return (0);
6391 	case CRYPTO_MECHANISM_INVALID:
6392 	case CRYPTO_MECH_NOT_SUPPORTED:
6393 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6394 		    SADB_X_DIAGNOSTIC_BAD_EALG;
6395 		break;
6396 	case CRYPTO_KEY_SIZE_RANGE:
6397 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6398 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6399 		break;
6400 	case CRYPTO_WEAK_KEY:
6401 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6402 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
6403 		break;
6404 	}
6405 
6406 	return (-1);
6407 }
6408