xref: /titanic_51/usr/src/uts/common/inet/ip/sadb.c (revision aa1b14e7d68925d80eebf0ce8ed0b9cc55246546)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/strsubr.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <inet/common.h>
49 #include <netinet/ip6.h>
50 #include <inet/ip.h>
51 #include <inet/ip_ire.h>
52 #include <inet/ip6.h>
53 #include <inet/ipsec_info.h>
54 #include <inet/tcp.h>
55 #include <inet/sadb.h>
56 #include <inet/ipsec_impl.h>
57 #include <inet/ipsecah.h>
58 #include <inet/ipsecesp.h>
59 #include <sys/random.h>
60 #include <sys/dlpi.h>
61 #include <sys/iphada.h>
62 #include <inet/ip_if.h>
63 #include <inet/ipdrop.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/sctp_ip.h>
66 #include <inet/tun.h>
67 
68 /*
69  * This source file contains Security Association Database (SADB) common
70  * routines.  They are linked in with the AH module.  Since AH has no chance
71  * of falling under export control, it was safe to link it in there.
72  */
73 
74 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
75     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
76 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
77 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
78 static void sadb_drain_torchq(queue_t *, mblk_t *);
79 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
80 			    netstack_t *);
81 static void sadb_destroy(sadb_t *, netstack_t *);
82 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
83 
84 static time_t sadb_add_time(time_t, uint64_t);
85 static void lifetime_fuzz(ipsa_t *);
86 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
87 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
88 
89 extern void (*cl_inet_getspi)(netstackid_t stack_id, uint8_t protocol,
90     uint8_t *ptr, size_t len, void *args);
91 extern int (*cl_inet_checkspi)(netstackid_t stack_id, uint8_t protocol,
92     uint32_t spi, void *args);
93 extern void (*cl_inet_deletespi)(netstackid_t stack_id, uint8_t protocol,
94     uint32_t spi, void *args);
95 
96 /*
97  * ipsacq_maxpackets is defined here to make it tunable
98  * from /etc/system.
99  */
100 extern uint64_t ipsacq_maxpackets;
101 
102 #define	SET_EXPIRE(sa, delta, exp) {				\
103 	if (((sa)->ipsa_ ## delta) != 0) {				\
104 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
105 			(sa)->ipsa_ ## delta);				\
106 	}								\
107 }
108 
109 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
110 	if (((sa)->ipsa_ ## delta) != 0) {				\
111 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
112 			(sa)->ipsa_ ## delta);				\
113 		if (((sa)->ipsa_ ## exp) == 0)				\
114 			(sa)->ipsa_ ## exp = tmp;			\
115 		else							\
116 			(sa)->ipsa_ ## exp = 				\
117 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
118 	}								\
119 }
120 
121 
122 /* wrap the macro so we can pass it as a function pointer */
123 void
124 sadb_sa_refrele(void *target)
125 {
126 	IPSA_REFRELE(((ipsa_t *)target));
127 }
128 
129 /*
130  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
131  * a signed type.
132  */
133 #define	TIME_MAX LONG_MAX
134 
135 /*
136  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
137  * time_t is defined to be a signed type with the same range as
138  * "long".  On ILP32 systems, we thus run the risk of wrapping around
139  * at end of time, as well as "overwrapping" the clock back around
140  * into a seemingly valid but incorrect future date earlier than the
141  * desired expiration.
142  *
143  * In order to avoid odd behavior (either negative lifetimes or loss
144  * of high order bits) when someone asks for bizarrely long SA
145  * lifetimes, we do a saturating add for expire times.
146  *
147  * We presume that ILP32 systems will be past end of support life when
148  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
149  *
150  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
151  * will hopefully have figured out clever ways to avoid the use of
152  * fixed-sized integers in computation.
153  */
154 static time_t
155 sadb_add_time(time_t base, uint64_t delta)
156 {
157 	time_t sum;
158 
159 	/*
160 	 * Clip delta to the maximum possible time_t value to
161 	 * prevent "overwrapping" back into a shorter-than-desired
162 	 * future time.
163 	 */
164 	if (delta > TIME_MAX)
165 		delta = TIME_MAX;
166 	/*
167 	 * This sum may still overflow.
168 	 */
169 	sum = base + delta;
170 
171 	/*
172 	 * .. so if the result is less than the base, we overflowed.
173 	 */
174 	if (sum < base)
175 		sum = TIME_MAX;
176 
177 	return (sum);
178 }
179 
180 /*
181  * Callers of this function have already created a working security
182  * association, and have found the appropriate table & hash chain.  All this
183  * function does is check duplicates, and insert the SA.  The caller needs to
184  * hold the hash bucket lock and increment the refcnt before insertion.
185  *
186  * Return 0 if success, EEXIST if collision.
187  */
188 #define	SA_UNIQUE_MATCH(sa1, sa2) \
189 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
190 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
191 
192 int
193 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
194 {
195 	ipsa_t **ptpn = NULL;
196 	ipsa_t *walker;
197 	boolean_t unspecsrc;
198 
199 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
200 
201 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
202 
203 	walker = bucket->isaf_ipsa;
204 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
205 
206 	/*
207 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
208 	 * of the list unless there's an unspecified source address, then
209 	 * insert it after the last SA with a specified source address.
210 	 *
211 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
212 	 * checking for collisions.
213 	 */
214 
215 	while (walker != NULL) {
216 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
217 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
218 			if (walker->ipsa_spi == ipsa->ipsa_spi)
219 				return (EEXIST);
220 
221 			mutex_enter(&walker->ipsa_lock);
222 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
223 			    (walker->ipsa_flags & IPSA_F_USED) &&
224 			    SA_UNIQUE_MATCH(walker, ipsa)) {
225 				walker->ipsa_flags |= IPSA_F_CINVALID;
226 			}
227 			mutex_exit(&walker->ipsa_lock);
228 		}
229 
230 		if (ptpn == NULL && unspecsrc) {
231 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
232 			    walker->ipsa_addrfam))
233 				ptpn = walker->ipsa_ptpn;
234 			else if (walker->ipsa_next == NULL)
235 				ptpn = &walker->ipsa_next;
236 		}
237 
238 		walker = walker->ipsa_next;
239 	}
240 
241 	if (ptpn == NULL)
242 		ptpn = &bucket->isaf_ipsa;
243 	ipsa->ipsa_next = *ptpn;
244 	ipsa->ipsa_ptpn = ptpn;
245 	if (ipsa->ipsa_next != NULL)
246 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
247 	*ptpn = ipsa;
248 	ipsa->ipsa_linklock = &bucket->isaf_lock;
249 
250 	return (0);
251 }
252 #undef SA_UNIQUE_MATCH
253 
254 /*
255  * Free a security association.  Its reference count is 0, which means
256  * I must free it.  The SA must be unlocked and must not be linked into
257  * any fanout list.
258  */
259 static void
260 sadb_freeassoc(ipsa_t *ipsa)
261 {
262 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
263 
264 	ASSERT(ipss != NULL);
265 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
266 	ASSERT(ipsa->ipsa_refcnt == 0);
267 	ASSERT(ipsa->ipsa_next == NULL);
268 	ASSERT(ipsa->ipsa_ptpn == NULL);
269 
270 	mutex_enter(&ipsa->ipsa_lock);
271 	/* Don't call sadb_clear_lpkt() since we hold the ipsa_lock anyway. */
272 	ip_drop_packet(ipsa->ipsa_lpkt, B_TRUE, NULL, NULL,
273 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
274 	    &ipss->ipsec_sadb_dropper);
275 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
276 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
277 	mutex_exit(&ipsa->ipsa_lock);
278 
279 	/* bzero() these fields for paranoia's sake. */
280 	if (ipsa->ipsa_authkey != NULL) {
281 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
282 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
283 	}
284 	if (ipsa->ipsa_encrkey != NULL) {
285 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
286 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
287 	}
288 	if (ipsa->ipsa_src_cid != NULL) {
289 		IPSID_REFRELE(ipsa->ipsa_src_cid);
290 	}
291 	if (ipsa->ipsa_dst_cid != NULL) {
292 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
293 	}
294 	if (ipsa->ipsa_integ != NULL)
295 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
296 	if (ipsa->ipsa_sens != NULL)
297 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
298 
299 	mutex_destroy(&ipsa->ipsa_lock);
300 	kmem_free(ipsa, sizeof (*ipsa));
301 }
302 
303 /*
304  * Unlink a security association from a hash bucket.  Assume the hash bucket
305  * lock is held, but the association's lock is not.
306  *
307  * Note that we do not bump the bucket's generation number here because
308  * we might not be making a visible change to the set of visible SA's.
309  * All callers MUST bump the bucket's generation number before they unlock
310  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
311  * was present in the bucket at the time it was locked.
312  */
313 void
314 sadb_unlinkassoc(ipsa_t *ipsa)
315 {
316 	ASSERT(ipsa->ipsa_linklock != NULL);
317 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
318 
319 	/* These fields are protected by the link lock. */
320 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
321 	if (ipsa->ipsa_next != NULL) {
322 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
323 		ipsa->ipsa_next = NULL;
324 	}
325 
326 	ipsa->ipsa_ptpn = NULL;
327 
328 	/* This may destroy the SA. */
329 	IPSA_REFRELE(ipsa);
330 }
331 
332 void
333 sadb_delete_cluster(ipsa_t *assoc)
334 {
335 	uint8_t protocol;
336 
337 	if (cl_inet_deletespi &&
338 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
339 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
340 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
341 		    IPPROTO_AH : IPPROTO_ESP;
342 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
343 		    protocol, assoc->ipsa_spi, NULL);
344 	}
345 }
346 
347 /*
348  * Create a larval security association with the specified SPI.	 All other
349  * fields are zeroed.
350  */
351 static ipsa_t *
352 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
353     netstack_t *ns)
354 {
355 	ipsa_t *newbie;
356 
357 	/*
358 	 * Allocate...
359 	 */
360 
361 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
362 	if (newbie == NULL) {
363 		/* Can't make new larval SA. */
364 		return (NULL);
365 	}
366 
367 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
368 	newbie->ipsa_spi = spi;
369 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
370 
371 	/*
372 	 * Copy addresses...
373 	 */
374 
375 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
376 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
377 
378 	newbie->ipsa_addrfam = addrfam;
379 
380 	/*
381 	 * Set common initialization values, including refcnt.
382 	 */
383 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
384 	newbie->ipsa_state = IPSA_STATE_LARVAL;
385 	newbie->ipsa_refcnt = 1;
386 	newbie->ipsa_freefunc = sadb_freeassoc;
387 
388 	/*
389 	 * There aren't a lot of other common initialization values, as
390 	 * they are copied in from the PF_KEY message.
391 	 */
392 
393 	return (newbie);
394 }
395 
396 /*
397  * Call me to initialize a security association fanout.
398  */
399 static int
400 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
401 {
402 	isaf_t *table;
403 	int i;
404 
405 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
406 	*tablep = table;
407 
408 	if (table == NULL)
409 		return (ENOMEM);
410 
411 	for (i = 0; i < size; i++) {
412 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
413 		table[i].isaf_ipsa = NULL;
414 		table[i].isaf_gen = 0;
415 	}
416 
417 	return (0);
418 }
419 
420 /*
421  * Call me to initialize an acquire fanout
422  */
423 static int
424 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
425 {
426 	iacqf_t *table;
427 	int i;
428 
429 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
430 	*tablep = table;
431 
432 	if (table == NULL)
433 		return (ENOMEM);
434 
435 	for (i = 0; i < size; i++) {
436 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
437 		table[i].iacqf_ipsacq = NULL;
438 	}
439 
440 	return (0);
441 }
442 
443 /*
444  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
445  * caller must clean up partial allocations.
446  */
447 static int
448 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
449 {
450 	ASSERT(sp->sdb_of == NULL);
451 	ASSERT(sp->sdb_if == NULL);
452 	ASSERT(sp->sdb_acq == NULL);
453 
454 	sp->sdb_hashsize = size;
455 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
456 		return (ENOMEM);
457 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
458 		return (ENOMEM);
459 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
460 		return (ENOMEM);
461 
462 	return (0);
463 }
464 
465 /*
466  * Call me to initialize an SADB instance; fall back to default size on failure.
467  */
468 static void
469 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
470     netstack_t *ns)
471 {
472 	ASSERT(sp->sdb_of == NULL);
473 	ASSERT(sp->sdb_if == NULL);
474 	ASSERT(sp->sdb_acq == NULL);
475 
476 	if (size < IPSEC_DEFAULT_HASH_SIZE)
477 		size = IPSEC_DEFAULT_HASH_SIZE;
478 
479 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
480 
481 		cmn_err(CE_WARN,
482 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
483 		    size, ver, name);
484 
485 		sadb_destroy(sp, ns);
486 		size = IPSEC_DEFAULT_HASH_SIZE;
487 		cmn_err(CE_WARN, "Falling back to %d entries", size);
488 		(void) sadb_init_trial(sp, size, KM_SLEEP);
489 	}
490 }
491 
492 
493 /*
494  * Initialize an SADB-pair.
495  */
496 void
497 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
498 {
499 	sadb_init(name, &sp->s_v4, size, 4, ns);
500 	sadb_init(name, &sp->s_v6, size, 6, ns);
501 
502 	sp->s_satype = type;
503 
504 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
505 	if (type == SADB_SATYPE_AH) {
506 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
507 
508 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
509 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
510 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
511 	} else {
512 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
513 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
514 	}
515 }
516 
517 /*
518  * Deliver a single SADB_DUMP message representing a single SA.  This is
519  * called many times by sadb_dump().
520  *
521  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
522  * the caller should take that as a hint that dupb() on the "original answer"
523  * failed, and that perhaps the caller should try again with a copyb()ed
524  * "original answer".
525  */
526 static int
527 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
528     sadb_msg_t *samsg)
529 {
530 	mblk_t *answer;
531 
532 	answer = dupb(original_answer);
533 	if (answer == NULL)
534 		return (ENOBUFS);
535 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
536 	if (answer->b_cont == NULL) {
537 		freeb(answer);
538 		return (ENOMEM);
539 	}
540 
541 	/* Just do a putnext, and let keysock deal with flow control. */
542 	putnext(pfkey_q, answer);
543 	return (0);
544 }
545 
546 /*
547  * Common function to allocate and prepare a keysock_out_t M_CTL message.
548  */
549 mblk_t *
550 sadb_keysock_out(minor_t serial)
551 {
552 	mblk_t *mp;
553 	keysock_out_t *kso;
554 
555 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
556 	if (mp != NULL) {
557 		mp->b_datap->db_type = M_CTL;
558 		mp->b_wptr += sizeof (ipsec_info_t);
559 		kso = (keysock_out_t *)mp->b_rptr;
560 		kso->ks_out_type = KEYSOCK_OUT;
561 		kso->ks_out_len = sizeof (*kso);
562 		kso->ks_out_serial = serial;
563 	}
564 
565 	return (mp);
566 }
567 
568 /*
569  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
570  * to keysock.
571  */
572 static int
573 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
574     int num_entries, boolean_t do_peers, time_t active_time)
575 {
576 	int i, error = 0;
577 	mblk_t *original_answer;
578 	ipsa_t *walker;
579 	sadb_msg_t *samsg;
580 	time_t	current;
581 
582 	/*
583 	 * For each IPSA hash bucket do:
584 	 *	- Hold the mutex
585 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
586 	 */
587 	ASSERT(mp->b_cont != NULL);
588 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
589 
590 	original_answer = sadb_keysock_out(serial);
591 	if (original_answer == NULL)
592 		return (ENOMEM);
593 
594 	current = gethrestime_sec();
595 	for (i = 0; i < num_entries; i++) {
596 		mutex_enter(&fanout[i].isaf_lock);
597 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
598 		    walker = walker->ipsa_next) {
599 			if (!do_peers && walker->ipsa_haspeer)
600 				continue;
601 			if ((active_time != 0) &&
602 			    ((current - walker->ipsa_lastuse) > active_time))
603 				continue;
604 			error = sadb_dump_deliver(pfkey_q, original_answer,
605 			    walker, samsg);
606 			if (error == ENOBUFS) {
607 				mblk_t *new_original_answer;
608 
609 				/* Ran out of dupb's.  Try a copyb. */
610 				new_original_answer = copyb(original_answer);
611 				if (new_original_answer == NULL) {
612 					error = ENOMEM;
613 				} else {
614 					freeb(original_answer);
615 					original_answer = new_original_answer;
616 					error = sadb_dump_deliver(pfkey_q,
617 					    original_answer, walker, samsg);
618 				}
619 			}
620 			if (error != 0)
621 				break;	/* out of for loop. */
622 		}
623 		mutex_exit(&fanout[i].isaf_lock);
624 		if (error != 0)
625 			break;	/* out of for loop. */
626 	}
627 
628 	freeb(original_answer);
629 	return (error);
630 }
631 
632 /*
633  * Dump an entire SADB; outbound first, then inbound.
634  */
635 
636 int
637 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
638 {
639 	int error;
640 	time_t	active_time = 0;
641 	sadb_x_edump_t	*edump =
642 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
643 
644 	if (edump != NULL) {
645 		active_time = edump->sadb_x_edump_timeout;
646 	}
647 
648 	/* Dump outbound */
649 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
650 	    sp->sdb_hashsize, B_TRUE, active_time);
651 	if (error)
652 		return (error);
653 
654 	/* Dump inbound */
655 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
656 	    sp->sdb_hashsize, B_FALSE, active_time);
657 }
658 
659 /*
660  * Generic sadb table walker.
661  *
662  * Call "walkfn" for each SA in each bucket in "table"; pass the
663  * bucket, the entry and "cookie" to the callback function.
664  * Take care to ensure that walkfn can delete the SA without screwing
665  * up our traverse.
666  *
667  * The bucket is locked for the duration of the callback, both so that the
668  * callback can just call sadb_unlinkassoc() when it wants to delete something,
669  * and so that no new entries are added while we're walking the list.
670  */
671 static void
672 sadb_walker(isaf_t *table, uint_t numentries,
673     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
674     void *cookie)
675 {
676 	int i;
677 	for (i = 0; i < numentries; i++) {
678 		ipsa_t *entry, *next;
679 
680 		mutex_enter(&table[i].isaf_lock);
681 
682 		for (entry = table[i].isaf_ipsa; entry != NULL;
683 		    entry = next) {
684 			next = entry->ipsa_next;
685 			(*walkfn)(&table[i], entry, cookie);
686 		}
687 		mutex_exit(&table[i].isaf_lock);
688 	}
689 }
690 
691 /*
692  * From the given SA, construct a dl_ct_ipsec_key and
693  * a dl_ct_ipsec structures to be sent to the adapter as part
694  * of a DL_CONTROL_REQ.
695  *
696  * ct_sa must point to the storage allocated for the key
697  * structure and must be followed by storage allocated
698  * for the SA information that must be sent to the driver
699  * as part of the DL_CONTROL_REQ request.
700  *
701  * The is_inbound boolean indicates whether the specified
702  * SA is part of an inbound SA table.
703  *
704  * Returns B_TRUE if the corresponding SA must be passed to
705  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
706  */
707 static boolean_t
708 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
709 {
710 	dl_ct_ipsec_key_t *keyp;
711 	dl_ct_ipsec_t *sap;
712 	void *ct_sa = mp->b_wptr;
713 
714 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
715 
716 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
717 	sap = (dl_ct_ipsec_t *)(keyp + 1);
718 
719 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
720 	    "is_inbound = %d\n", is_inbound));
721 
722 	/* initialize flag */
723 	sap->sadb_sa_flags = 0;
724 	if (is_inbound) {
725 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
726 		/*
727 		 * If an inbound SA has a peer, then mark it has being
728 		 * an outbound SA as well.
729 		 */
730 		if (sa->ipsa_haspeer)
731 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
732 	} else {
733 		/*
734 		 * If an outbound SA has a peer, then don't send it,
735 		 * since we will send the copy from the inbound table.
736 		 */
737 		if (sa->ipsa_haspeer) {
738 			freemsg(mp);
739 			return (B_FALSE);
740 		}
741 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
742 	}
743 
744 	keyp->dl_key_spi = sa->ipsa_spi;
745 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
746 	    DL_CTL_IPSEC_ADDR_LEN);
747 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
748 
749 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
750 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
751 
752 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
753 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
754 	bcopy(sa->ipsa_authkey,
755 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
756 
757 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
758 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
759 	bcopy(sa->ipsa_encrkey,
760 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
761 
762 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
763 	return (B_TRUE);
764 }
765 
766 /*
767  * Called from AH or ESP to format a message which will be used to inform
768  * IPsec-acceleration-capable ills of a SADB change.
769  * (It is not possible to send the message to IP directly from this function
770  * since the SA, if any, is locked during the call).
771  *
772  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
773  * sa_type: identifies whether the operation applies to AH or ESP
774  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
775  * sa: Pointer to an SA.  Must be non-NULL and locked
776  *	for ADD, DELETE, GET, and UPDATE operations.
777  * This function returns an mblk chain that must be passed to IP
778  * for forwarding to the IPsec capable providers.
779  */
780 mblk_t *
781 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
782     boolean_t is_inbound)
783 {
784 	mblk_t *mp;
785 	dl_control_req_t *ctrl;
786 	boolean_t need_key = B_FALSE;
787 	mblk_t *ctl_mp = NULL;
788 	ipsec_ctl_t *ctl;
789 
790 	/*
791 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
792 	 * 2 if a key is needed for the operation
793 	 *    2.1 initialize key
794 	 *    2.2 if a full SA is needed for the operation
795 	 *	2.2.1 initialize full SA info
796 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
797 	 * to send the resulting message to IPsec capable ills.
798 	 */
799 
800 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
801 
802 	/*
803 	 * Allocate DL_CONTROL_REQ M_PROTO
804 	 * We allocate room for the SA even if it's not needed
805 	 * by some of the operations (for example flush)
806 	 */
807 	mp = allocb(sizeof (dl_control_req_t) +
808 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
809 	if (mp == NULL)
810 		return (NULL);
811 	mp->b_datap->db_type = M_PROTO;
812 
813 	/* initialize dl_control_req_t */
814 	ctrl = (dl_control_req_t *)mp->b_wptr;
815 	ctrl->dl_primitive = DL_CONTROL_REQ;
816 	ctrl->dl_operation = dl_operation;
817 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
818 	    DL_CT_IPSEC_ESP;
819 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
820 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
821 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
822 	    sizeof (dl_ct_ipsec_key_t);
823 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
824 	mp->b_wptr += sizeof (dl_control_req_t);
825 
826 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
827 		ASSERT(sa != NULL);
828 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
829 
830 		need_key = B_TRUE;
831 
832 		/*
833 		 * Initialize key and SA data. Note that for some
834 		 * operations the SA data is ignored by the provider
835 		 * (delete, etc.)
836 		 */
837 		if (!sadb_req_from_sa(sa, mp, is_inbound))
838 			return (NULL);
839 	}
840 
841 	/* construct control message */
842 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
843 	if (ctl_mp == NULL) {
844 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
845 		freemsg(mp);
846 		return (NULL);
847 	}
848 
849 	ctl_mp->b_datap->db_type = M_CTL;
850 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
851 	ctl_mp->b_cont = mp;
852 
853 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
854 	ctl->ipsec_ctl_type = IPSEC_CTL;
855 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
856 	ctl->ipsec_ctl_sa_type = sa_type;
857 
858 	if (need_key) {
859 		/*
860 		 * Keep an additional reference on SA, since it will be
861 		 * needed by IP to send control messages corresponding
862 		 * to that SA from its perimeter. IP will do a
863 		 * IPSA_REFRELE when done with the request.
864 		 */
865 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
866 		IPSA_REFHOLD(sa);
867 		ctl->ipsec_ctl_sa = sa;
868 	} else
869 		ctl->ipsec_ctl_sa = NULL;
870 
871 	return (ctl_mp);
872 }
873 
874 
875 /*
876  * Called by sadb_ill_download() to dump the entries for a specific
877  * fanout table.  For each SA entry in the table passed as argument,
878  * use mp as a template and constructs a full DL_CONTROL message, and
879  * call ill_dlpi_send(), provided by IP, to send the resulting
880  * messages to the ill.
881  */
882 static void
883 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
884     boolean_t is_inbound)
885 {
886 	ipsa_t *walker;
887 	mblk_t *nmp, *salist;
888 	int i, error = 0;
889 	ip_stack_t	*ipst = ill->ill_ipst;
890 	netstack_t	*ns = ipst->ips_netstack;
891 
892 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
893 	    (void *)fanout, num_entries));
894 	/*
895 	 * For each IPSA hash bucket do:
896 	 *	- Hold the mutex
897 	 *	- Walk each entry, sending a corresponding request to IP
898 	 *	  for it.
899 	 */
900 	ASSERT(mp->b_datap->db_type == M_PROTO);
901 
902 	for (i = 0; i < num_entries; i++) {
903 		mutex_enter(&fanout[i].isaf_lock);
904 		salist = NULL;
905 
906 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
907 		    walker = walker->ipsa_next) {
908 			IPSECHW_DEBUG(IPSECHW_SADB,
909 			    ("sadb_ill_df: sending SA to ill via IP \n"));
910 			/*
911 			 * Duplicate the template mp passed and
912 			 * complete DL_CONTROL_REQ data.
913 			 * To be more memory efficient, we could use
914 			 * dupb() for the M_CTL and copyb() for the M_PROTO
915 			 * as the M_CTL, since the M_CTL is the same for
916 			 * every SA entry passed down to IP for the same ill.
917 			 *
918 			 * Note that copymsg/copyb ensure that the new mblk
919 			 * is at least as large as the source mblk even if it's
920 			 * not using all its storage -- therefore, nmp
921 			 * has trailing space for sadb_req_from_sa to add
922 			 * the SA-specific bits.
923 			 */
924 			mutex_enter(&walker->ipsa_lock);
925 			if (ipsec_capab_match(ill,
926 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
927 			    walker, ns)) {
928 				nmp = copymsg(mp);
929 				if (nmp == NULL) {
930 					IPSECHW_DEBUG(IPSECHW_SADB,
931 					    ("sadb_ill_df: alloc error\n"));
932 					error = ENOMEM;
933 					mutex_exit(&walker->ipsa_lock);
934 					break;
935 				}
936 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
937 					nmp->b_next = salist;
938 					salist = nmp;
939 				}
940 			}
941 			mutex_exit(&walker->ipsa_lock);
942 		}
943 		mutex_exit(&fanout[i].isaf_lock);
944 		while (salist != NULL) {
945 			nmp = salist;
946 			salist = nmp->b_next;
947 			nmp->b_next = NULL;
948 			ill_dlpi_send(ill, nmp);
949 		}
950 		if (error != 0)
951 			break;	/* out of for loop. */
952 	}
953 }
954 
955 /*
956  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
957  * the type specified by sa_type to the specified ill.
958  *
959  * We call for each fanout table defined by the SADB (one per
960  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
961  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
962  * message to the ill.
963  */
964 void
965 sadb_ill_download(ill_t *ill, uint_t sa_type)
966 {
967 	mblk_t *protomp;	/* prototype message */
968 	dl_control_req_t *ctrl;
969 	sadbp_t *spp;
970 	sadb_t *sp;
971 	int dlt;
972 	ip_stack_t	*ipst = ill->ill_ipst;
973 	netstack_t	*ns = ipst->ips_netstack;
974 
975 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
976 
977 	/*
978 	 * Allocate and initialize prototype answer. A duplicate for
979 	 * each SA is sent down to the interface.
980 	 */
981 
982 	/* DL_CONTROL_REQ M_PROTO mblk_t */
983 	protomp = allocb(sizeof (dl_control_req_t) +
984 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
985 	if (protomp == NULL)
986 		return;
987 	protomp->b_datap->db_type = M_PROTO;
988 
989 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
990 	if (sa_type == SADB_SATYPE_ESP) {
991 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
992 
993 		spp = &espstack->esp_sadb;
994 	} else {
995 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
996 
997 		spp = &ahstack->ah_sadb;
998 	}
999 
1000 	ctrl = (dl_control_req_t *)protomp->b_wptr;
1001 	ctrl->dl_primitive = DL_CONTROL_REQ;
1002 	ctrl->dl_operation = DL_CO_SET;
1003 	ctrl->dl_type = dlt;
1004 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
1005 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
1006 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
1007 	    sizeof (dl_ct_ipsec_key_t);
1008 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
1009 	protomp->b_wptr += sizeof (dl_control_req_t);
1010 
1011 	/*
1012 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
1013 	 * and dl_ct_ipsec_t
1014 	 */
1015 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
1016 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1017 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1018 	freemsg(protomp);
1019 }
1020 
1021 /*
1022  * Call me to free up a security association fanout.  Use the forever
1023  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
1024  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
1025  * when a module is unloaded).
1026  */
1027 static void
1028 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
1029     boolean_t inbound)
1030 {
1031 	int i;
1032 	isaf_t *table = *tablep;
1033 	uint8_t protocol;
1034 	ipsa_t *sa;
1035 	netstackid_t sid;
1036 
1037 	if (table == NULL)
1038 		return;
1039 
1040 	for (i = 0; i < numentries; i++) {
1041 		mutex_enter(&table[i].isaf_lock);
1042 		while ((sa = table[i].isaf_ipsa) != NULL) {
1043 			if (inbound && cl_inet_deletespi &&
1044 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
1045 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
1046 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
1047 				    IPPROTO_AH : IPPROTO_ESP;
1048 				sid = sa->ipsa_netstack->netstack_stackid;
1049 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
1050 				    NULL);
1051 			}
1052 			sadb_unlinkassoc(sa);
1053 		}
1054 		table[i].isaf_gen++;
1055 		mutex_exit(&table[i].isaf_lock);
1056 		if (forever)
1057 			mutex_destroy(&(table[i].isaf_lock));
1058 	}
1059 
1060 	if (forever) {
1061 		*tablep = NULL;
1062 		kmem_free(table, numentries * sizeof (*table));
1063 	}
1064 }
1065 
1066 /*
1067  * Entry points to sadb_destroyer().
1068  */
1069 static void
1070 sadb_flush(sadb_t *sp, netstack_t *ns)
1071 {
1072 	/*
1073 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1074 	 * enforcement, there would be a subtlety where I could add on the
1075 	 * heels of a flush.  With keysock's enforcement, however, this
1076 	 * makes ESP's job easy.
1077 	 */
1078 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
1079 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
1080 
1081 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1082 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1083 }
1084 
1085 static void
1086 sadb_destroy(sadb_t *sp, netstack_t *ns)
1087 {
1088 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
1089 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
1090 
1091 	/* For each acquire, destroy it, including the bucket mutex. */
1092 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1093 
1094 	ASSERT(sp->sdb_of == NULL);
1095 	ASSERT(sp->sdb_if == NULL);
1096 	ASSERT(sp->sdb_acq == NULL);
1097 }
1098 
1099 static void
1100 sadb_send_flush_req(sadbp_t *spp)
1101 {
1102 	mblk_t *ctl_mp;
1103 
1104 	/*
1105 	 * we've been unplumbed, or never were plumbed; don't go there.
1106 	 */
1107 	if (spp->s_ip_q == NULL)
1108 		return;
1109 
1110 	/* have IP send a flush msg to the IPsec accelerators */
1111 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1112 	if (ctl_mp != NULL)
1113 		putnext(spp->s_ip_q, ctl_mp);
1114 }
1115 
1116 void
1117 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1118 {
1119 	sadb_flush(&spp->s_v4, ns);
1120 	sadb_flush(&spp->s_v6, ns);
1121 
1122 	sadb_send_flush_req(spp);
1123 }
1124 
1125 void
1126 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1127 {
1128 	sadb_destroy(&spp->s_v4, ns);
1129 	sadb_destroy(&spp->s_v6, ns);
1130 
1131 	sadb_send_flush_req(spp);
1132 	if (spp->s_satype == SADB_SATYPE_AH) {
1133 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1134 
1135 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1136 	}
1137 }
1138 
1139 
1140 /*
1141  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1142  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1143  * EINVAL.
1144  */
1145 int
1146 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
1147     sadb_lifetime_t *idle)
1148 {
1149 	if (hard == NULL || soft == NULL)
1150 		return (0);
1151 
1152 	if (hard->sadb_lifetime_allocations != 0 &&
1153 	    soft->sadb_lifetime_allocations != 0 &&
1154 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1155 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1156 
1157 	if (hard->sadb_lifetime_bytes != 0 &&
1158 	    soft->sadb_lifetime_bytes != 0 &&
1159 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1160 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1161 
1162 	if (hard->sadb_lifetime_addtime != 0 &&
1163 	    soft->sadb_lifetime_addtime != 0 &&
1164 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1165 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1166 
1167 	if (hard->sadb_lifetime_usetime != 0 &&
1168 	    soft->sadb_lifetime_usetime != 0 &&
1169 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1170 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1171 
1172 	if (idle != NULL) {
1173 		if (hard->sadb_lifetime_addtime != 0 &&
1174 		    idle->sadb_lifetime_addtime != 0 &&
1175 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1176 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1177 
1178 		if (soft->sadb_lifetime_addtime != 0 &&
1179 		    idle->sadb_lifetime_addtime != 0 &&
1180 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1181 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1182 
1183 		if (hard->sadb_lifetime_usetime != 0 &&
1184 		    idle->sadb_lifetime_usetime != 0 &&
1185 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1186 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1187 
1188 		if (soft->sadb_lifetime_usetime != 0 &&
1189 		    idle->sadb_lifetime_usetime != 0 &&
1190 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1191 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1192 	}
1193 
1194 	return (0);
1195 }
1196 
1197 /*
1198  * Clone a security association for the purposes of inserting a single SA
1199  * into inbound and outbound tables respectively. This function should only
1200  * be called from sadb_common_add().
1201  */
1202 static ipsa_t *
1203 sadb_cloneassoc(ipsa_t *ipsa)
1204 {
1205 	ipsa_t *newbie;
1206 	boolean_t error = B_FALSE;
1207 
1208 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1209 
1210 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1211 	if (newbie == NULL)
1212 		return (NULL);
1213 
1214 	/* Copy over what we can. */
1215 	*newbie = *ipsa;
1216 
1217 	/* bzero and initialize locks, in case *_init() allocates... */
1218 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1219 
1220 	/*
1221 	 * While somewhat dain-bramaged, the most graceful way to
1222 	 * recover from errors is to keep plowing through the
1223 	 * allocations, and getting what I can.  It's easier to call
1224 	 * sadb_freeassoc() on the stillborn clone when all the
1225 	 * pointers aren't pointing to the parent's data.
1226 	 */
1227 
1228 	if (ipsa->ipsa_authkey != NULL) {
1229 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1230 		    KM_NOSLEEP);
1231 		if (newbie->ipsa_authkey == NULL) {
1232 			error = B_TRUE;
1233 		} else {
1234 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1235 			    newbie->ipsa_authkeylen);
1236 
1237 			newbie->ipsa_kcfauthkey.ck_data =
1238 			    newbie->ipsa_authkey;
1239 		}
1240 
1241 		if (newbie->ipsa_amech.cm_param != NULL) {
1242 			newbie->ipsa_amech.cm_param =
1243 			    (char *)&newbie->ipsa_mac_len;
1244 		}
1245 	}
1246 
1247 	if (ipsa->ipsa_encrkey != NULL) {
1248 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1249 		    KM_NOSLEEP);
1250 		if (newbie->ipsa_encrkey == NULL) {
1251 			error = B_TRUE;
1252 		} else {
1253 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1254 			    newbie->ipsa_encrkeylen);
1255 
1256 			newbie->ipsa_kcfencrkey.ck_data =
1257 			    newbie->ipsa_encrkey;
1258 		}
1259 	}
1260 
1261 	newbie->ipsa_authtmpl = NULL;
1262 	newbie->ipsa_encrtmpl = NULL;
1263 	newbie->ipsa_haspeer = B_TRUE;
1264 
1265 	if (ipsa->ipsa_integ != NULL) {
1266 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1267 		    KM_NOSLEEP);
1268 		if (newbie->ipsa_integ == NULL) {
1269 			error = B_TRUE;
1270 		} else {
1271 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1272 			    newbie->ipsa_integlen);
1273 		}
1274 	}
1275 
1276 	if (ipsa->ipsa_sens != NULL) {
1277 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1278 		    KM_NOSLEEP);
1279 		if (newbie->ipsa_sens == NULL) {
1280 			error = B_TRUE;
1281 		} else {
1282 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1283 			    newbie->ipsa_senslen);
1284 		}
1285 	}
1286 
1287 	if (ipsa->ipsa_src_cid != NULL) {
1288 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1289 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1290 	}
1291 
1292 	if (ipsa->ipsa_dst_cid != NULL) {
1293 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1294 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1295 	}
1296 
1297 	if (error) {
1298 		sadb_freeassoc(newbie);
1299 		return (NULL);
1300 	}
1301 
1302 	return (newbie);
1303 }
1304 
1305 /*
1306  * Initialize a SADB address extension at the address specified by addrext.
1307  * Return a pointer to the end of the new address extension.
1308  */
1309 static uint8_t *
1310 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1311     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1312 {
1313 	struct sockaddr_in *sin;
1314 	struct sockaddr_in6 *sin6;
1315 	uint8_t *cur = start;
1316 	int addrext_len;
1317 	int sin_len;
1318 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1319 
1320 	if (cur == NULL)
1321 		return (NULL);
1322 
1323 	cur += sizeof (*addrext);
1324 	if (cur > end)
1325 		return (NULL);
1326 
1327 	addrext->sadb_address_proto = proto;
1328 	addrext->sadb_address_prefixlen = prefix;
1329 	addrext->sadb_address_reserved = 0;
1330 	addrext->sadb_address_exttype = exttype;
1331 
1332 	switch (af) {
1333 	case AF_INET:
1334 		sin = (struct sockaddr_in *)cur;
1335 		sin_len = sizeof (*sin);
1336 		cur += sin_len;
1337 		if (cur > end)
1338 			return (NULL);
1339 
1340 		sin->sin_family = af;
1341 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1342 		sin->sin_port = port;
1343 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1344 		break;
1345 	case AF_INET6:
1346 		sin6 = (struct sockaddr_in6 *)cur;
1347 		sin_len = sizeof (*sin6);
1348 		cur += sin_len;
1349 		if (cur > end)
1350 			return (NULL);
1351 
1352 		bzero(sin6, sizeof (*sin6));
1353 		sin6->sin6_family = af;
1354 		sin6->sin6_port = port;
1355 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1356 		break;
1357 	}
1358 
1359 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1360 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1361 
1362 	cur = start + addrext_len;
1363 	if (cur > end)
1364 		cur = NULL;
1365 
1366 	return (cur);
1367 }
1368 
1369 /*
1370  * Construct a key management cookie extension.
1371  */
1372 
1373 static uint8_t *
1374 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1375 {
1376 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1377 
1378 	if (cur == NULL)
1379 		return (NULL);
1380 
1381 	cur += sizeof (*kmcext);
1382 
1383 	if (cur > end)
1384 		return (NULL);
1385 
1386 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1387 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1388 	kmcext->sadb_x_kmc_proto = kmp;
1389 	kmcext->sadb_x_kmc_cookie = kmc;
1390 	kmcext->sadb_x_kmc_reserved = 0;
1391 
1392 	return (cur);
1393 }
1394 
1395 /*
1396  * Given an original message header with sufficient space following it, and an
1397  * SA, construct a full PF_KEY message with all of the relevant extensions.
1398  * This is mostly used for SADB_GET, and SADB_DUMP.
1399  */
1400 static mblk_t *
1401 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1402 {
1403 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1404 	int srcidsize, dstidsize;
1405 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1406 				/* src/dst and proxy sockaddrs. */
1407 	/*
1408 	 * The following are pointers into the PF_KEY message this PF_KEY
1409 	 * message creates.
1410 	 */
1411 	sadb_msg_t *newsamsg;
1412 	sadb_sa_t *assoc;
1413 	sadb_lifetime_t *lt;
1414 	sadb_key_t *key;
1415 	sadb_ident_t *ident;
1416 	sadb_sens_t *sens;
1417 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1418 	sadb_x_replay_ctr_t *repl_ctr;
1419 	sadb_x_pair_t *pair_ext;
1420 
1421 	mblk_t *mp;
1422 	uint64_t *bitmap;
1423 	uint8_t *cur, *end;
1424 	/* These indicate the presence of the above extension fields. */
1425 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1426 	boolean_t idle;
1427 	boolean_t paired;
1428 	uint32_t otherspi;
1429 
1430 	/* First off, figure out the allocation length for this message. */
1431 
1432 	/*
1433 	 * Constant stuff.  This includes base, SA, address (src, dst),
1434 	 * and lifetime (current).
1435 	 */
1436 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1437 	    sizeof (sadb_lifetime_t);
1438 
1439 	fam = ipsa->ipsa_addrfam;
1440 	switch (fam) {
1441 	case AF_INET:
1442 		addrsize = roundup(sizeof (struct sockaddr_in) +
1443 		    sizeof (sadb_address_t), sizeof (uint64_t));
1444 		break;
1445 	case AF_INET6:
1446 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1447 		    sizeof (sadb_address_t), sizeof (uint64_t));
1448 		break;
1449 	default:
1450 		return (NULL);
1451 	}
1452 	/*
1453 	 * Allocate TWO address extensions, for source and destination.
1454 	 * (Thus, the * 2.)
1455 	 */
1456 	alloclen += addrsize * 2;
1457 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1458 		alloclen += addrsize;
1459 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1460 		alloclen += addrsize;
1461 
1462 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1463 		paired = B_TRUE;
1464 		alloclen += sizeof (sadb_x_pair_t);
1465 		otherspi = ipsa->ipsa_otherspi;
1466 	} else {
1467 		paired = B_FALSE;
1468 	}
1469 
1470 	/* How 'bout other lifetimes? */
1471 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1472 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1473 		alloclen += sizeof (sadb_lifetime_t);
1474 		soft = B_TRUE;
1475 	} else {
1476 		soft = B_FALSE;
1477 	}
1478 
1479 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1480 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1481 		alloclen += sizeof (sadb_lifetime_t);
1482 		hard = B_TRUE;
1483 	} else {
1484 		hard = B_FALSE;
1485 	}
1486 
1487 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1488 		alloclen += sizeof (sadb_lifetime_t);
1489 		idle = B_TRUE;
1490 	} else {
1491 		idle = B_FALSE;
1492 	}
1493 
1494 	/* Inner addresses. */
1495 	if (ipsa->ipsa_innerfam == 0) {
1496 		isrc = B_FALSE;
1497 		idst = B_FALSE;
1498 	} else {
1499 		pfam = ipsa->ipsa_innerfam;
1500 		switch (pfam) {
1501 		case AF_INET6:
1502 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1503 			    sizeof (sadb_address_t), sizeof (uint64_t));
1504 			break;
1505 		case AF_INET:
1506 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1507 			    sizeof (sadb_address_t), sizeof (uint64_t));
1508 			break;
1509 		default:
1510 			cmn_err(CE_PANIC,
1511 			    "IPsec SADB: Proxy length failure.\n");
1512 			break;
1513 		}
1514 		isrc = B_TRUE;
1515 		idst = B_TRUE;
1516 		alloclen += 2 * paddrsize;
1517 	}
1518 
1519 	/* For the following fields, assume that length != 0 ==> stuff */
1520 	if (ipsa->ipsa_authkeylen != 0) {
1521 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1522 		    sizeof (uint64_t));
1523 		alloclen += authsize;
1524 		auth = B_TRUE;
1525 	} else {
1526 		auth = B_FALSE;
1527 	}
1528 
1529 	if (ipsa->ipsa_encrkeylen != 0) {
1530 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1531 		    sizeof (uint64_t));
1532 		alloclen += encrsize;
1533 		encr = B_TRUE;
1534 	} else {
1535 		encr = B_FALSE;
1536 	}
1537 
1538 	/* No need for roundup on sens and integ. */
1539 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1540 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1541 		    ipsa->ipsa_senslen;
1542 		sensinteg = B_TRUE;
1543 	} else {
1544 		sensinteg = B_FALSE;
1545 	}
1546 
1547 	/*
1548 	 * Must use strlen() here for lengths.	Identities use NULL
1549 	 * pointers to indicate their nonexistence.
1550 	 */
1551 	if (ipsa->ipsa_src_cid != NULL) {
1552 		srcidsize = roundup(sizeof (sadb_ident_t) +
1553 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1554 		    sizeof (uint64_t));
1555 		alloclen += srcidsize;
1556 		srcid = B_TRUE;
1557 	} else {
1558 		srcid = B_FALSE;
1559 	}
1560 
1561 	if (ipsa->ipsa_dst_cid != NULL) {
1562 		dstidsize = roundup(sizeof (sadb_ident_t) +
1563 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1564 		    sizeof (uint64_t));
1565 		alloclen += dstidsize;
1566 		dstid = B_TRUE;
1567 	} else {
1568 		dstid = B_FALSE;
1569 	}
1570 
1571 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1572 		alloclen += sizeof (sadb_x_kmc_t);
1573 
1574 	if (ipsa->ipsa_replay != 0) {
1575 		alloclen += sizeof (sadb_x_replay_ctr_t);
1576 	}
1577 
1578 	/* Make sure the allocation length is a multiple of 8 bytes. */
1579 	ASSERT((alloclen & 0x7) == 0);
1580 
1581 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1582 	mp = allocb(alloclen, BPRI_HI);
1583 	if (mp == NULL)
1584 		return (NULL);
1585 
1586 	mp->b_wptr += alloclen;
1587 	end = mp->b_wptr;
1588 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1589 	*newsamsg = *samsg;
1590 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1591 
1592 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1593 
1594 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1595 
1596 	assoc = (sadb_sa_t *)(newsamsg + 1);
1597 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1598 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1599 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1600 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1601 	assoc->sadb_sa_state = ipsa->ipsa_state;
1602 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1603 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1604 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1605 
1606 	lt = (sadb_lifetime_t *)(assoc + 1);
1607 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1608 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1609 	/* We do not support the concept. */
1610 	lt->sadb_lifetime_allocations = 0;
1611 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1612 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1613 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1614 
1615 	if (hard) {
1616 		lt++;
1617 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1618 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1619 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1620 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1621 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1622 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1623 	}
1624 
1625 	if (soft) {
1626 		lt++;
1627 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1628 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1629 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1630 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1631 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1632 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1633 	}
1634 
1635 	if (idle) {
1636 		lt++;
1637 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1638 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1639 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1640 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1641 	}
1642 
1643 	cur = (uint8_t *)(lt + 1);
1644 
1645 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1646 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1647 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1648 	    SA_PROTO(ipsa), 0);
1649 	if (cur == NULL) {
1650 		freemsg(mp);
1651 		mp = NULL;
1652 		goto bail;
1653 	}
1654 
1655 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1656 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1657 	    SA_PROTO(ipsa), 0);
1658 	if (cur == NULL) {
1659 		freemsg(mp);
1660 		mp = NULL;
1661 		goto bail;
1662 	}
1663 
1664 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1665 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1666 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1667 		    IPPROTO_UDP, 0);
1668 		if (cur == NULL) {
1669 			freemsg(mp);
1670 			mp = NULL;
1671 			goto bail;
1672 		}
1673 	}
1674 
1675 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1676 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1677 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1678 		    IPPROTO_UDP, 0);
1679 		if (cur == NULL) {
1680 			freemsg(mp);
1681 			mp = NULL;
1682 			goto bail;
1683 		}
1684 	}
1685 
1686 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1687 	if (isrc) {
1688 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1689 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1690 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1691 		if (cur == NULL) {
1692 			freemsg(mp);
1693 			mp = NULL;
1694 			goto bail;
1695 		}
1696 	}
1697 
1698 	if (idst) {
1699 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1700 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1701 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1702 		if (cur == NULL) {
1703 			freemsg(mp);
1704 			mp = NULL;
1705 			goto bail;
1706 		}
1707 	}
1708 
1709 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1710 		cur = sadb_make_kmc_ext(cur, end,
1711 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1712 		if (cur == NULL) {
1713 			freemsg(mp);
1714 			mp = NULL;
1715 			goto bail;
1716 		}
1717 	}
1718 
1719 	walker = (sadb_ext_t *)cur;
1720 	if (auth) {
1721 		key = (sadb_key_t *)walker;
1722 		key->sadb_key_len = SADB_8TO64(authsize);
1723 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1724 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1725 		key->sadb_key_reserved = 0;
1726 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1727 		walker = (sadb_ext_t *)((uint64_t *)walker +
1728 		    walker->sadb_ext_len);
1729 	}
1730 
1731 	if (encr) {
1732 		key = (sadb_key_t *)walker;
1733 		key->sadb_key_len = SADB_8TO64(encrsize);
1734 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1735 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1736 		key->sadb_key_reserved = 0;
1737 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1738 		walker = (sadb_ext_t *)((uint64_t *)walker +
1739 		    walker->sadb_ext_len);
1740 	}
1741 
1742 	if (srcid) {
1743 		ident = (sadb_ident_t *)walker;
1744 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1745 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1746 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1747 		ident->sadb_ident_id = 0;
1748 		ident->sadb_ident_reserved = 0;
1749 		(void) strcpy((char *)(ident + 1),
1750 		    ipsa->ipsa_src_cid->ipsid_cid);
1751 		walker = (sadb_ext_t *)((uint64_t *)walker +
1752 		    walker->sadb_ext_len);
1753 	}
1754 
1755 	if (dstid) {
1756 		ident = (sadb_ident_t *)walker;
1757 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1758 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1759 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1760 		ident->sadb_ident_id = 0;
1761 		ident->sadb_ident_reserved = 0;
1762 		(void) strcpy((char *)(ident + 1),
1763 		    ipsa->ipsa_dst_cid->ipsid_cid);
1764 		walker = (sadb_ext_t *)((uint64_t *)walker +
1765 		    walker->sadb_ext_len);
1766 	}
1767 
1768 	if (sensinteg) {
1769 		sens = (sadb_sens_t *)walker;
1770 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1771 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1772 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1773 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1774 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1775 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1776 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1777 		sens->sadb_sens_reserved = 0;
1778 		bitmap = (uint64_t *)(sens + 1);
1779 		if (ipsa->ipsa_sens != NULL) {
1780 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1781 			bitmap += sens->sadb_sens_sens_len;
1782 		}
1783 		if (ipsa->ipsa_integ != NULL)
1784 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1785 		walker = (sadb_ext_t *)((uint64_t *)walker +
1786 		    walker->sadb_ext_len);
1787 	}
1788 
1789 	if (paired) {
1790 		pair_ext = (sadb_x_pair_t *)walker;
1791 
1792 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1793 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1794 		pair_ext->sadb_x_pair_spi = otherspi;
1795 
1796 		walker = (sadb_ext_t *)((uint64_t *)walker +
1797 		    walker->sadb_ext_len);
1798 	}
1799 
1800 	if (ipsa->ipsa_replay != 0) {
1801 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1802 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1803 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1804 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1805 		repl_ctr->sadb_x_rc_replay64 = 0;
1806 		walker = (sadb_ext_t *)(repl_ctr + 1);
1807 	}
1808 
1809 bail:
1810 	/* Pardon any delays... */
1811 	mutex_exit(&ipsa->ipsa_lock);
1812 
1813 	return (mp);
1814 }
1815 
1816 /*
1817  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1818  * and adjust base message accordingly.
1819  *
1820  * Assume message is pulled up in one piece of contiguous memory.
1821  *
1822  * Say if we start off with:
1823  *
1824  * +------+----+-------------+-----------+---------------+---------------+
1825  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1826  * +------+----+-------------+-----------+---------------+---------------+
1827  *
1828  * we will end up with
1829  *
1830  * +------+----+-------------+-----------+---------------+
1831  * | base | SA | source addr | dest addr | soft lifetime |
1832  * +------+----+-------------+-----------+---------------+
1833  */
1834 static void
1835 sadb_strip(sadb_msg_t *samsg)
1836 {
1837 	sadb_ext_t *ext;
1838 	uint8_t *target = NULL;
1839 	uint8_t *msgend;
1840 	int sofar = SADB_8TO64(sizeof (*samsg));
1841 	int copylen;
1842 
1843 	ext = (sadb_ext_t *)(samsg + 1);
1844 	msgend = (uint8_t *)samsg;
1845 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1846 	while ((uint8_t *)ext < msgend) {
1847 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1848 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1849 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1850 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1851 			/*
1852 			 * Aha!	 I found a header to be erased.
1853 			 */
1854 
1855 			if (target != NULL) {
1856 				/*
1857 				 * If I had a previous header to be erased,
1858 				 * copy over it.  I can get away with just
1859 				 * copying backwards because the target will
1860 				 * always be 8 bytes behind the source.
1861 				 */
1862 				copylen = ((uint8_t *)ext) - (target +
1863 				    SADB_64TO8(
1864 				    ((sadb_ext_t *)target)->sadb_ext_len));
1865 				ovbcopy(((uint8_t *)ext - copylen), target,
1866 				    copylen);
1867 				target += copylen;
1868 				((sadb_ext_t *)target)->sadb_ext_len =
1869 				    SADB_8TO64(((uint8_t *)ext) - target +
1870 				    SADB_64TO8(ext->sadb_ext_len));
1871 			} else {
1872 				target = (uint8_t *)ext;
1873 			}
1874 		} else {
1875 			sofar += ext->sadb_ext_len;
1876 		}
1877 
1878 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1879 	}
1880 
1881 	ASSERT((uint8_t *)ext == msgend);
1882 
1883 	if (target != NULL) {
1884 		copylen = ((uint8_t *)ext) - (target +
1885 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1886 		if (copylen != 0)
1887 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1888 	}
1889 
1890 	/* Adjust samsg. */
1891 	samsg->sadb_msg_len = (uint16_t)sofar;
1892 
1893 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1894 }
1895 
1896 /*
1897  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1898  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1899  * the sending keysock instance is included.
1900  */
1901 void
1902 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1903     uint_t serial)
1904 {
1905 	mblk_t *msg = mp->b_cont;
1906 	sadb_msg_t *samsg;
1907 	keysock_out_t *kso;
1908 
1909 	/*
1910 	 * Enough functions call this to merit a NULL queue check.
1911 	 */
1912 	if (pfkey_q == NULL) {
1913 		freemsg(mp);
1914 		return;
1915 	}
1916 
1917 	ASSERT(msg != NULL);
1918 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1919 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1920 	samsg = (sadb_msg_t *)msg->b_rptr;
1921 	kso = (keysock_out_t *)mp->b_rptr;
1922 
1923 	kso->ks_out_type = KEYSOCK_OUT;
1924 	kso->ks_out_len = sizeof (*kso);
1925 	kso->ks_out_serial = serial;
1926 
1927 	/*
1928 	 * Only send the base message up in the event of an error.
1929 	 * Don't worry about bzero()-ing, because it was probably bogus
1930 	 * anyway.
1931 	 */
1932 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1933 	samsg = (sadb_msg_t *)msg->b_rptr;
1934 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1935 	samsg->sadb_msg_errno = (uint8_t)error;
1936 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1937 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1938 
1939 	putnext(pfkey_q, mp);
1940 }
1941 
1942 /*
1943  * Send a successful return packet back to keysock via the queue in pfkey_q.
1944  *
1945  * Often, an SA is associated with the reply message, it's passed in if needed,
1946  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1947  * and the caller will release said refcnt.
1948  */
1949 void
1950 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1951     keysock_in_t *ksi, ipsa_t *ipsa)
1952 {
1953 	keysock_out_t *kso;
1954 	mblk_t *mp1;
1955 	sadb_msg_t *newsamsg;
1956 	uint8_t *oldend;
1957 
1958 	ASSERT((mp->b_cont != NULL) &&
1959 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1960 	    ((void *)mp->b_rptr == (void *)ksi));
1961 
1962 	switch (samsg->sadb_msg_type) {
1963 	case SADB_ADD:
1964 	case SADB_UPDATE:
1965 	case SADB_X_UPDATEPAIR:
1966 	case SADB_X_DELPAIR_STATE:
1967 	case SADB_FLUSH:
1968 	case SADB_DUMP:
1969 		/*
1970 		 * I have all of the message already.  I just need to strip
1971 		 * out the keying material and echo the message back.
1972 		 *
1973 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1974 		 * work.  When DUMP reaches here, it should only be a base
1975 		 * message.
1976 		 */
1977 	justecho:
1978 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1979 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1980 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1981 			sadb_strip(samsg);
1982 			/* Assume PF_KEY message is contiguous. */
1983 			ASSERT(mp->b_cont->b_cont == NULL);
1984 			oldend = mp->b_cont->b_wptr;
1985 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1986 			    SADB_64TO8(samsg->sadb_msg_len);
1987 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1988 		}
1989 		break;
1990 	case SADB_GET:
1991 		/*
1992 		 * Do a lot of work here, because of the ipsa I just found.
1993 		 * First construct the new PF_KEY message, then abandon
1994 		 * the old one.
1995 		 */
1996 		mp1 = sadb_sa2msg(ipsa, samsg);
1997 		if (mp1 == NULL) {
1998 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1999 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2000 			return;
2001 		}
2002 		freemsg(mp->b_cont);
2003 		mp->b_cont = mp1;
2004 		break;
2005 	case SADB_DELETE:
2006 	case SADB_X_DELPAIR:
2007 		if (ipsa == NULL)
2008 			goto justecho;
2009 		/*
2010 		 * Because listening KMds may require more info, treat
2011 		 * DELETE like a special case of GET.
2012 		 */
2013 		mp1 = sadb_sa2msg(ipsa, samsg);
2014 		if (mp1 == NULL) {
2015 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2016 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2017 			return;
2018 		}
2019 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
2020 		sadb_strip(newsamsg);
2021 		oldend = mp1->b_wptr;
2022 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
2023 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
2024 		freemsg(mp->b_cont);
2025 		mp->b_cont = mp1;
2026 		break;
2027 	default:
2028 		if (mp != NULL)
2029 			freemsg(mp);
2030 		return;
2031 	}
2032 
2033 	/* ksi is now null and void. */
2034 	kso = (keysock_out_t *)ksi;
2035 	kso->ks_out_type = KEYSOCK_OUT;
2036 	kso->ks_out_len = sizeof (*kso);
2037 	kso->ks_out_serial = ksi->ks_in_serial;
2038 	/* We're ready to send... */
2039 	putnext(pfkey_q, mp);
2040 }
2041 
2042 /*
2043  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
2044  */
2045 void
2046 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
2047     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
2048 {
2049 	keysock_hello_ack_t *kha;
2050 	queue_t *oldq;
2051 
2052 	ASSERT(OTHERQ(q) != NULL);
2053 
2054 	/*
2055 	 * First, check atomically that I'm the first and only keysock
2056 	 * instance.
2057 	 *
2058 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
2059 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
2060 	 * messages.
2061 	 */
2062 
2063 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
2064 	if (oldq != NULL) {
2065 		ASSERT(oldq != q);
2066 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
2067 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
2068 		freemsg(mp);
2069 		return;
2070 	}
2071 
2072 	kha = (keysock_hello_ack_t *)mp->b_rptr;
2073 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
2074 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
2075 	kha->ks_hello_satype = (uint8_t)satype;
2076 
2077 	/*
2078 	 * If we made it past the casptr, then we have "exclusive" access
2079 	 * to the timeout handle.  Fire it off after the default ager
2080 	 * interval.
2081 	 */
2082 	*top = qtimeout(*pfkey_qp, ager, agerarg,
2083 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
2084 
2085 	putnext(*pfkey_qp, mp);
2086 }
2087 
2088 /*
2089  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
2090  *
2091  * Check addresses themselves for wildcard or multicast.
2092  * Check ire table for local/non-local/broadcast.
2093  */
2094 int
2095 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
2096     netstack_t *ns)
2097 {
2098 	sadb_address_t *addr = (sadb_address_t *)ext;
2099 	struct sockaddr_in *sin;
2100 	struct sockaddr_in6 *sin6;
2101 	ire_t *ire;
2102 	int diagnostic, type;
2103 	boolean_t normalized = B_FALSE;
2104 
2105 	ASSERT(ext != NULL);
2106 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2107 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2108 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2109 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2110 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2111 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2112 
2113 	/* Assign both sockaddrs, the compiler will do the right thing. */
2114 	sin = (struct sockaddr_in *)(addr + 1);
2115 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2116 
2117 	if (sin6->sin6_family == AF_INET6) {
2118 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2119 			/*
2120 			 * Convert to an AF_INET sockaddr.  This means the
2121 			 * return messages will have the extra space, but have
2122 			 * AF_INET sockaddrs instead of AF_INET6.
2123 			 *
2124 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2125 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2126 			 * equal to AF_INET <v4>, it shouldnt be a huge
2127 			 * problem.
2128 			 */
2129 			sin->sin_family = AF_INET;
2130 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2131 			    &sin->sin_addr);
2132 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2133 			normalized = B_TRUE;
2134 		}
2135 	} else if (sin->sin_family != AF_INET) {
2136 		switch (ext->sadb_ext_type) {
2137 		case SADB_EXT_ADDRESS_SRC:
2138 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2139 			break;
2140 		case SADB_EXT_ADDRESS_DST:
2141 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2142 			break;
2143 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2144 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2145 			break;
2146 		case SADB_X_EXT_ADDRESS_INNER_DST:
2147 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2148 			break;
2149 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2150 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2151 			break;
2152 		case SADB_X_EXT_ADDRESS_NATT_REM:
2153 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2154 			break;
2155 			/* There is no default, see above ASSERT. */
2156 		}
2157 bail:
2158 		if (pfkey_q != NULL) {
2159 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2160 			    serial);
2161 		} else {
2162 			/*
2163 			 * Scribble in sadb_msg that we got passed in.
2164 			 * Overload "mp" to be an sadb_msg pointer.
2165 			 */
2166 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2167 
2168 			samsg->sadb_msg_errno = EINVAL;
2169 			samsg->sadb_x_msg_diagnostic = diagnostic;
2170 		}
2171 		return (KS_IN_ADDR_UNKNOWN);
2172 	}
2173 
2174 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2175 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2176 		/*
2177 		 * We need only check for prefix issues.
2178 		 */
2179 
2180 		/* Set diagnostic now, in case we need it later. */
2181 		diagnostic =
2182 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2183 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2184 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2185 
2186 		if (normalized)
2187 			addr->sadb_address_prefixlen -= 96;
2188 
2189 		/*
2190 		 * Verify and mask out inner-addresses based on prefix length.
2191 		 */
2192 		if (sin->sin_family == AF_INET) {
2193 			if (addr->sadb_address_prefixlen > 32)
2194 				goto bail;
2195 			sin->sin_addr.s_addr &=
2196 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2197 		} else {
2198 			in6_addr_t mask;
2199 
2200 			ASSERT(sin->sin_family == AF_INET6);
2201 			/*
2202 			 * ip_plen_to_mask_v6() returns NULL if the value in
2203 			 * question is out of range.
2204 			 */
2205 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2206 			    &mask) == NULL)
2207 				goto bail;
2208 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2209 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2210 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2211 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2212 		}
2213 
2214 		/* We don't care in these cases. */
2215 		return (KS_IN_ADDR_DONTCARE);
2216 	}
2217 
2218 	if (sin->sin_family == AF_INET6) {
2219 		/* Check the easy ones now. */
2220 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2221 			return (KS_IN_ADDR_MBCAST);
2222 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2223 			return (KS_IN_ADDR_UNSPEC);
2224 		/*
2225 		 * At this point, we're a unicast IPv6 address.
2226 		 *
2227 		 * A ctable lookup for local is sufficient here.  If we're
2228 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2229 		 *
2230 		 * XXX Zones alert -> me/notme decision needs to be tempered
2231 		 * by what zone we're in when we go to zone-aware IPsec.
2232 		 */
2233 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2234 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2235 		    ns->netstack_ip);
2236 		if (ire != NULL) {
2237 			/* Hey hey, it's local. */
2238 			IRE_REFRELE(ire);
2239 			return (KS_IN_ADDR_ME);
2240 		}
2241 	} else {
2242 		ASSERT(sin->sin_family == AF_INET);
2243 		if (sin->sin_addr.s_addr == INADDR_ANY)
2244 			return (KS_IN_ADDR_UNSPEC);
2245 		if (CLASSD(sin->sin_addr.s_addr))
2246 			return (KS_IN_ADDR_MBCAST);
2247 		/*
2248 		 * At this point we're a unicast or broadcast IPv4 address.
2249 		 *
2250 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2251 		 * A NULL return value is NOTME, otherwise, look at the
2252 		 * returned ire for broadcast or not and return accordingly.
2253 		 *
2254 		 * XXX Zones alert -> me/notme decision needs to be tempered
2255 		 * by what zone we're in when we go to zone-aware IPsec.
2256 		 */
2257 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2258 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2259 		    MATCH_IRE_TYPE, ns->netstack_ip);
2260 		if (ire != NULL) {
2261 			/* Check for local or broadcast */
2262 			type = ire->ire_type;
2263 			IRE_REFRELE(ire);
2264 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2265 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2266 			    KS_IN_ADDR_MBCAST);
2267 		}
2268 	}
2269 
2270 	return (KS_IN_ADDR_NOTME);
2271 }
2272 
2273 /*
2274  * Address normalizations and reality checks for inbound PF_KEY messages.
2275  *
2276  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2277  * the source to AF_INET.  Do the same for the inner sources.
2278  */
2279 boolean_t
2280 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2281 {
2282 	struct sockaddr_in *src, *isrc;
2283 	struct sockaddr_in6 *dst, *idst;
2284 	sadb_address_t *srcext, *dstext;
2285 	uint16_t sport;
2286 	sadb_ext_t **extv = ksi->ks_in_extv;
2287 	int rc;
2288 
2289 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2290 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2291 		    ksi->ks_in_serial, ns);
2292 		if (rc == KS_IN_ADDR_UNKNOWN)
2293 			return (B_FALSE);
2294 		if (rc == KS_IN_ADDR_MBCAST) {
2295 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2296 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2297 			return (B_FALSE);
2298 		}
2299 		ksi->ks_in_srctype = rc;
2300 	}
2301 
2302 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2303 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2304 		    ksi->ks_in_serial, ns);
2305 		if (rc == KS_IN_ADDR_UNKNOWN)
2306 			return (B_FALSE);
2307 		if (rc == KS_IN_ADDR_UNSPEC) {
2308 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2309 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2310 			return (B_FALSE);
2311 		}
2312 		ksi->ks_in_dsttype = rc;
2313 	}
2314 
2315 	/*
2316 	 * NAT-Traversal addrs are simple enough to not require all of
2317 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2318 	 * AF_INET.
2319 	 */
2320 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2321 		rc = sadb_addrcheck(pfkey_q, mp,
2322 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2323 
2324 		/*
2325 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2326 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2327 		 * AND local-port flexibility).
2328 		 */
2329 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2330 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2331 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2332 			    ksi->ks_in_serial);
2333 			return (B_FALSE);
2334 		}
2335 		src = (struct sockaddr_in *)
2336 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2337 		if (src->sin_family != AF_INET) {
2338 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2339 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2340 			    ksi->ks_in_serial);
2341 			return (B_FALSE);
2342 		}
2343 	}
2344 
2345 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2346 		rc = sadb_addrcheck(pfkey_q, mp,
2347 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2348 
2349 		/*
2350 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2351 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2352 		 */
2353 		if (rc != KS_IN_ADDR_NOTME &&
2354 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2355 		    rc == KS_IN_ADDR_UNSPEC)) {
2356 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2357 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2358 			    ksi->ks_in_serial);
2359 			return (B_FALSE);
2360 		}
2361 		src = (struct sockaddr_in *)
2362 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2363 		if (src->sin_family != AF_INET) {
2364 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2365 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2366 			    ksi->ks_in_serial);
2367 			return (B_FALSE);
2368 		}
2369 	}
2370 
2371 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2372 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2373 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2374 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2375 			    ksi->ks_in_serial);
2376 			return (B_FALSE);
2377 		}
2378 
2379 		if (sadb_addrcheck(pfkey_q, mp,
2380 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2381 		    == KS_IN_ADDR_UNKNOWN ||
2382 		    sadb_addrcheck(pfkey_q, mp,
2383 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2384 		    == KS_IN_ADDR_UNKNOWN)
2385 			return (B_FALSE);
2386 
2387 		isrc = (struct sockaddr_in *)
2388 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2389 		    1);
2390 		idst = (struct sockaddr_in6 *)
2391 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2392 		    1);
2393 		if (isrc->sin_family != idst->sin6_family) {
2394 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2395 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2396 			    ksi->ks_in_serial);
2397 			return (B_FALSE);
2398 		}
2399 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2400 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2401 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2402 			    ksi->ks_in_serial);
2403 			return (B_FALSE);
2404 	} else {
2405 		isrc = NULL;	/* For inner/outer port check below. */
2406 	}
2407 
2408 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2409 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2410 
2411 	if (dstext == NULL || srcext == NULL)
2412 		return (B_TRUE);
2413 
2414 	dst = (struct sockaddr_in6 *)(dstext + 1);
2415 	src = (struct sockaddr_in *)(srcext + 1);
2416 
2417 	if (isrc != NULL &&
2418 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2419 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2420 		/* Can't set inner and outer ports in one SA. */
2421 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2422 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2423 		    ksi->ks_in_serial);
2424 		return (B_FALSE);
2425 	}
2426 
2427 	if (dst->sin6_family == src->sin_family)
2428 		return (B_TRUE);
2429 
2430 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2431 		if (srcext->sadb_address_proto == 0) {
2432 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2433 		} else if (dstext->sadb_address_proto == 0) {
2434 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2435 		} else {
2436 			/* Inequal protocols, neither were 0.  Report error. */
2437 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2438 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2439 			    ksi->ks_in_serial);
2440 			return (B_FALSE);
2441 		}
2442 	}
2443 
2444 	/*
2445 	 * With the exception of an unspec IPv6 source and an IPv4
2446 	 * destination, address families MUST me matched.
2447 	 */
2448 	if (src->sin_family == AF_INET ||
2449 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2450 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2451 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2452 		return (B_FALSE);
2453 	}
2454 
2455 	/*
2456 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2457 	 * in the same place for sockaddr_in and sockaddr_in6.
2458 	 */
2459 	sport = src->sin_port;
2460 	bzero(src, sizeof (*src));
2461 	src->sin_family = AF_INET;
2462 	src->sin_port = sport;
2463 
2464 	return (B_TRUE);
2465 }
2466 
2467 /*
2468  * Set the results in "addrtype", given an IRE as requested by
2469  * sadb_addrcheck().
2470  */
2471 int
2472 sadb_addrset(ire_t *ire)
2473 {
2474 	if ((ire->ire_type & IRE_BROADCAST) ||
2475 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2476 	    (ire->ire_ipversion == IPV6_VERSION &&
2477 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2478 		return (KS_IN_ADDR_MBCAST);
2479 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2480 		return (KS_IN_ADDR_ME);
2481 	return (KS_IN_ADDR_NOTME);
2482 }
2483 
2484 
2485 /*
2486  * Walker callback function to delete sa's based on src/dst address.
2487  * Assumes that we're called with *head locked, no other locks held;
2488  * Conveniently, and not coincidentally, this is both what sadb_walker
2489  * gives us and also what sadb_unlinkassoc expects.
2490  */
2491 
2492 struct sadb_purge_state
2493 {
2494 	uint32_t *src;
2495 	uint32_t *dst;
2496 	sa_family_t af;
2497 	boolean_t inbnd;
2498 	char *sidstr;
2499 	char *didstr;
2500 	uint16_t sidtype;
2501 	uint16_t didtype;
2502 	uint32_t kmproto;
2503 	uint8_t sadb_sa_state;
2504 	mblk_t *mq;
2505 	sadb_t *sp;
2506 };
2507 
2508 static void
2509 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2510 {
2511 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2512 
2513 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2514 
2515 	mutex_enter(&entry->ipsa_lock);
2516 
2517 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2518 	    (ps->src != NULL &&
2519 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2520 	    (ps->dst != NULL &&
2521 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2522 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2523 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2524 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2525 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2526 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2527 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2528 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2529 		mutex_exit(&entry->ipsa_lock);
2530 		return;
2531 	}
2532 
2533 	if (ps->inbnd) {
2534 		sadb_delete_cluster(entry);
2535 	}
2536 	entry->ipsa_state = IPSA_STATE_DEAD;
2537 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2538 }
2539 
2540 /*
2541  * Common code to purge an SA with a matching src or dst address.
2542  * Don't kill larval SA's in such a purge.
2543  */
2544 int
2545 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2546     queue_t *ip_q)
2547 {
2548 	sadb_address_t *dstext =
2549 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2550 	sadb_address_t *srcext =
2551 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2552 	sadb_ident_t *dstid =
2553 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2554 	sadb_ident_t *srcid =
2555 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2556 	sadb_x_kmc_t *kmc =
2557 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2558 	struct sockaddr_in *src, *dst;
2559 	struct sockaddr_in6 *src6, *dst6;
2560 	struct sadb_purge_state ps;
2561 
2562 	/*
2563 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2564 	 * takes care of them.
2565 	 */
2566 
2567 	/* enforced by caller */
2568 	ASSERT((dstext != NULL) || (srcext != NULL));
2569 
2570 	ps.src = NULL;
2571 	ps.dst = NULL;
2572 #ifdef DEBUG
2573 	ps.af = (sa_family_t)-1;
2574 #endif
2575 	ps.mq = NULL;
2576 	ps.sidstr = NULL;
2577 	ps.didstr = NULL;
2578 	ps.kmproto = SADB_X_KMP_MAX + 1;
2579 
2580 	if (dstext != NULL) {
2581 		dst = (struct sockaddr_in *)(dstext + 1);
2582 		ps.af = dst->sin_family;
2583 		if (dst->sin_family == AF_INET6) {
2584 			dst6 = (struct sockaddr_in6 *)dst;
2585 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2586 		} else {
2587 			ps.dst = (uint32_t *)&dst->sin_addr;
2588 		}
2589 	}
2590 
2591 	if (srcext != NULL) {
2592 		src = (struct sockaddr_in *)(srcext + 1);
2593 		ps.af = src->sin_family;
2594 		if (src->sin_family == AF_INET6) {
2595 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2596 			ps.src = (uint32_t *)&src6->sin6_addr;
2597 		} else {
2598 			ps.src = (uint32_t *)&src->sin_addr;
2599 		}
2600 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2601 	}
2602 
2603 	ASSERT(ps.af != (sa_family_t)-1);
2604 
2605 	if (dstid != NULL) {
2606 		/*
2607 		 * NOTE:  May need to copy string in the future
2608 		 * if the inbound keysock message disappears for some strange
2609 		 * reason.
2610 		 */
2611 		ps.didstr = (char *)(dstid + 1);
2612 		ps.didtype = dstid->sadb_ident_type;
2613 	}
2614 
2615 	if (srcid != NULL) {
2616 		/*
2617 		 * NOTE:  May need to copy string in the future
2618 		 * if the inbound keysock message disappears for some strange
2619 		 * reason.
2620 		 */
2621 		ps.sidstr = (char *)(srcid + 1);
2622 		ps.sidtype = srcid->sadb_ident_type;
2623 	}
2624 
2625 	if (kmc != NULL)
2626 		ps.kmproto = kmc->sadb_x_kmc_proto;
2627 
2628 	/*
2629 	 * This is simple, crude, and effective.
2630 	 * Unimplemented optimizations (TBD):
2631 	 * - we can limit how many places we search based on where we
2632 	 * think the SA is filed.
2633 	 * - if we get a dst address, we can hash based on dst addr to find
2634 	 * the correct bucket in the outbound table.
2635 	 */
2636 	ps.inbnd = B_TRUE;
2637 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2638 	ps.inbnd = B_FALSE;
2639 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2640 
2641 	if (ps.mq != NULL)
2642 		sadb_drain_torchq(ip_q, ps.mq);
2643 
2644 	ASSERT(mp->b_cont != NULL);
2645 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2646 	    NULL);
2647 	return (0);
2648 }
2649 
2650 static void
2651 sadb_delpair_state(isaf_t *head, ipsa_t *entry, void *cookie)
2652 {
2653 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2654 	isaf_t  *inbound_bucket;
2655 	ipsa_t *peer_assoc;
2656 
2657 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2658 
2659 	mutex_enter(&entry->ipsa_lock);
2660 
2661 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2662 	    ((ps->src != NULL) &&
2663 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af))) {
2664 		mutex_exit(&entry->ipsa_lock);
2665 		return;
2666 	}
2667 
2668 	/*
2669 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2670 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2671 	 * ordering. The sadb_walker() which triggers this function is called
2672 	 * only on the outbound fanout, and the corresponding inbound bucket
2673 	 * lock is safe to acquire here.
2674 	 */
2675 
2676 	if (entry->ipsa_haspeer) {
2677 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_spi);
2678 		mutex_enter(&inbound_bucket->isaf_lock);
2679 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2680 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2681 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2682 	} else {
2683 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_otherspi);
2684 		mutex_enter(&inbound_bucket->isaf_lock);
2685 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2686 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2687 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2688 	}
2689 
2690 	entry->ipsa_state = IPSA_STATE_DEAD;
2691 	(void) sadb_torch_assoc(head, entry, B_FALSE, &ps->mq);
2692 	if (peer_assoc != NULL) {
2693 		mutex_enter(&peer_assoc->ipsa_lock);
2694 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2695 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc,
2696 		    B_FALSE, &ps->mq);
2697 	}
2698 	mutex_exit(&inbound_bucket->isaf_lock);
2699 }
2700 
2701 /*
2702  * Common code to delete/get an SA.
2703  */
2704 int
2705 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2706     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2707 {
2708 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2709 	sadb_address_t *srcext =
2710 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2711 	sadb_address_t *dstext =
2712 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2713 	ipsa_t *echo_target = NULL;
2714 	ipsap_t *ipsapp;
2715 	mblk_t *torchq = NULL;
2716 	uint_t	error = 0;
2717 
2718 	if (assoc == NULL) {
2719 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2720 		return (EINVAL);
2721 	}
2722 
2723 	if (sadb_msg_type == SADB_X_DELPAIR_STATE) {
2724 		struct sockaddr_in *src;
2725 		struct sockaddr_in6 *src6;
2726 		struct sadb_purge_state ps;
2727 
2728 		if (srcext == NULL) {
2729 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2730 			return (EINVAL);
2731 		}
2732 		ps.src = NULL;
2733 		ps.mq = NULL;
2734 		src = (struct sockaddr_in *)(srcext + 1);
2735 		ps.af = src->sin_family;
2736 		if (src->sin_family == AF_INET6) {
2737 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2738 			ps.src = (uint32_t *)&src6->sin6_addr;
2739 			ps.sp = &spp->s_v6;
2740 		} else {
2741 			ps.src = (uint32_t *)&src->sin_addr;
2742 			ps.sp = &spp->s_v4;
2743 		}
2744 		ps.inbnd = B_FALSE;
2745 		ps.sadb_sa_state = assoc->sadb_sa_state;
2746 		sadb_walker(ps.sp->sdb_of, ps.sp->sdb_hashsize,
2747 		    sadb_delpair_state, &ps);
2748 
2749 		if (ps.mq != NULL)
2750 			sadb_drain_torchq(pfkey_q, ps.mq);
2751 
2752 		ASSERT(mp->b_cont != NULL);
2753 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2754 		    ksi, NULL);
2755 		return (0);
2756 	}
2757 
2758 	if (dstext == NULL) {
2759 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2760 		return (EINVAL);
2761 	}
2762 
2763 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2764 	if (ipsapp == NULL) {
2765 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2766 		return (ESRCH);
2767 	}
2768 
2769 	echo_target = ipsapp->ipsap_sa_ptr;
2770 	if (echo_target == NULL)
2771 		echo_target = ipsapp->ipsap_psa_ptr;
2772 
2773 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2774 		/*
2775 		 * Bucket locks will be required if SA is actually unlinked.
2776 		 * get_ipsa_pair() returns valid hash bucket pointers even
2777 		 * if it can't find a pair SA pointer. To prevent a potential
2778 		 * deadlock, always lock the outbound bucket before the inbound.
2779 		 */
2780 		if (ipsapp->in_inbound_table) {
2781 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2782 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2783 		} else {
2784 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2785 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2786 		}
2787 
2788 		if (ipsapp->ipsap_sa_ptr != NULL) {
2789 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2790 			if (ipsapp->ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2791 				sadb_delete_cluster(ipsapp->ipsap_sa_ptr);
2792 			}
2793 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2794 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2795 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2796 			/*
2797 			 * sadb_torch_assoc() releases the ipsa_lock
2798 			 * and calls sadb_unlinkassoc() which does a
2799 			 * IPSA_REFRELE.
2800 			 */
2801 		}
2802 		if (ipsapp->ipsap_psa_ptr != NULL) {
2803 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2804 			if (sadb_msg_type == SADB_X_DELPAIR ||
2805 			    ipsapp->ipsap_psa_ptr->ipsa_haspeer) {
2806 				if (ipsapp->ipsap_psa_ptr->ipsa_flags &
2807 				    IPSA_F_INBOUND) {
2808 					sadb_delete_cluster(
2809 					    ipsapp->ipsap_psa_ptr);
2810 				}
2811 				ipsapp->ipsap_psa_ptr->ipsa_state =
2812 				    IPSA_STATE_DEAD;
2813 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2814 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2815 			} else {
2816 				/*
2817 				 * Only half of the "pair" has been deleted.
2818 				 * Update the remaining SA and remove references
2819 				 * to its pair SA, which is now gone.
2820 				 */
2821 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2822 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2823 				    ~IPSA_F_PAIRED;
2824 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2825 			}
2826 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2827 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2828 			error = ESRCH;
2829 		}
2830 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2831 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2832 	}
2833 
2834 	if (torchq != NULL)
2835 		sadb_drain_torchq(spp->s_ip_q, torchq);
2836 
2837 	ASSERT(mp->b_cont != NULL);
2838 
2839 	if (error == 0)
2840 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2841 		    mp->b_cont->b_rptr, ksi, echo_target);
2842 
2843 	destroy_ipsa_pair(ipsapp);
2844 
2845 	return (error);
2846 }
2847 
2848 /*
2849  * This function takes a sadb_sa_t and finds the ipsa_t structure
2850  * and the isaf_t (hash bucket) that its stored under. If the security
2851  * association has a peer, the ipsa_t structure and bucket for that security
2852  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2853  * are returned as a ipsap_t.
2854  *
2855  * The hash buckets are returned for convenience, if the calling function
2856  * needs to use the hash bucket locks, say to remove the SA's, it should
2857  * take care to observe the convention of locking outbound bucket then
2858  * inbound bucket. The flag in_inbound_table provides direction.
2859  *
2860  * Note that a "pair" is defined as one (but not both) of the following:
2861  *
2862  * A security association which has a soft reference to another security
2863  * association via its SPI.
2864  *
2865  * A security association that is not obviously "inbound" or "outbound" so
2866  * it appears in both hash tables, the "peer" being the same security
2867  * association in the other hash table.
2868  *
2869  * This function will return NULL if the ipsa_t can't be found in the
2870  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2871  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2872  * provided at least one ipsa_t is found.
2873  */
2874 ipsap_t *
2875 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2876     sadbp_t *spp)
2877 {
2878 	struct sockaddr_in *src, *dst;
2879 	struct sockaddr_in6 *src6, *dst6;
2880 	sadb_t *sp;
2881 	uint32_t *srcaddr, *dstaddr;
2882 	isaf_t *outbound_bucket, *inbound_bucket;
2883 	ipsap_t *ipsapp;
2884 	sa_family_t af;
2885 
2886 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2887 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2888 	uint32_t pair_spi;
2889 
2890 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2891 	if (ipsapp == NULL)
2892 		return (NULL);
2893 
2894 	ipsapp->in_inbound_table = B_FALSE;
2895 
2896 	/*
2897 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2898 	 * takes care of them.
2899 	 */
2900 
2901 	dst = (struct sockaddr_in *)(dstext + 1);
2902 	af = dst->sin_family;
2903 	if (af == AF_INET6) {
2904 		sp = &spp->s_v6;
2905 		dst6 = (struct sockaddr_in6 *)dst;
2906 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2907 		if (srcext != NULL) {
2908 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2909 			srcaddr = (uint32_t *)&src6->sin6_addr;
2910 			ASSERT(src6->sin6_family == af);
2911 			ASSERT(src6->sin6_family == AF_INET6);
2912 		} else {
2913 			srcaddr = ALL_ZEROES_PTR;
2914 		}
2915 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2916 		    *(uint32_t *)dstaddr);
2917 	} else {
2918 		sp = &spp->s_v4;
2919 		dstaddr = (uint32_t *)&dst->sin_addr;
2920 		if (srcext != NULL) {
2921 			src = (struct sockaddr_in *)(srcext + 1);
2922 			srcaddr = (uint32_t *)&src->sin_addr;
2923 			ASSERT(src->sin_family == af);
2924 			ASSERT(src->sin_family == AF_INET);
2925 		} else {
2926 			srcaddr = ALL_ZEROES_PTR;
2927 		}
2928 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2929 		    *(uint32_t *)dstaddr);
2930 	}
2931 
2932 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2933 
2934 	/* Lock down both buckets. */
2935 	mutex_enter(&outbound_bucket->isaf_lock);
2936 	mutex_enter(&inbound_bucket->isaf_lock);
2937 
2938 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2939 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2940 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2941 		if (ipsapp->ipsap_sa_ptr != NULL) {
2942 			ipsapp->ipsap_bucket = inbound_bucket;
2943 			ipsapp->ipsap_pbucket = outbound_bucket;
2944 			ipsapp->in_inbound_table = B_TRUE;
2945 		} else {
2946 			ipsapp->ipsap_sa_ptr =
2947 			    ipsec_getassocbyspi(outbound_bucket,
2948 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2949 			ipsapp->ipsap_bucket = outbound_bucket;
2950 			ipsapp->ipsap_pbucket = inbound_bucket;
2951 		}
2952 	} else {
2953 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2954 		ipsapp->ipsap_sa_ptr =
2955 		    ipsec_getassocbyspi(outbound_bucket,
2956 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2957 		if (ipsapp->ipsap_sa_ptr != NULL) {
2958 			ipsapp->ipsap_bucket = outbound_bucket;
2959 			ipsapp->ipsap_pbucket = inbound_bucket;
2960 		} else {
2961 			ipsapp->ipsap_sa_ptr =
2962 			    ipsec_getassocbyspi(inbound_bucket,
2963 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2964 			ipsapp->ipsap_bucket = inbound_bucket;
2965 			ipsapp->ipsap_pbucket = outbound_bucket;
2966 			if (ipsapp->ipsap_sa_ptr != NULL)
2967 				ipsapp->in_inbound_table = B_TRUE;
2968 		}
2969 	}
2970 
2971 	if (ipsapp->ipsap_sa_ptr == NULL) {
2972 		mutex_exit(&outbound_bucket->isaf_lock);
2973 		mutex_exit(&inbound_bucket->isaf_lock);
2974 		kmem_free(ipsapp, sizeof (*ipsapp));
2975 		return (NULL);
2976 	}
2977 
2978 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2979 	    ipsapp->in_inbound_table) {
2980 		mutex_exit(&outbound_bucket->isaf_lock);
2981 		mutex_exit(&inbound_bucket->isaf_lock);
2982 		return (ipsapp);
2983 	}
2984 
2985 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2986 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2987 		/*
2988 		 * haspeer implies no sa_pairing, look for same spi
2989 		 * in other hashtable.
2990 		 */
2991 		ipsapp->ipsap_psa_ptr =
2992 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2993 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2994 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2995 		mutex_exit(&outbound_bucket->isaf_lock);
2996 		mutex_exit(&inbound_bucket->isaf_lock);
2997 		return (ipsapp);
2998 	}
2999 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
3000 	IPSA_COPY_ADDR(&pair_srcaddr,
3001 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
3002 	IPSA_COPY_ADDR(&pair_dstaddr,
3003 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
3004 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
3005 	mutex_exit(&outbound_bucket->isaf_lock);
3006 	mutex_exit(&inbound_bucket->isaf_lock);
3007 
3008 	if (pair_spi == 0) {
3009 		ASSERT(ipsapp->ipsap_bucket != NULL);
3010 		ASSERT(ipsapp->ipsap_pbucket != NULL);
3011 		return (ipsapp);
3012 	}
3013 
3014 	/* found sa in outbound sadb, peer should be inbound */
3015 
3016 	if (ipsapp->in_inbound_table) {
3017 		/* Found SA in inbound table, pair will be in outbound. */
3018 		if (af == AF_INET6) {
3019 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
3020 			    *(uint32_t *)pair_srcaddr);
3021 		} else {
3022 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
3023 			    *(uint32_t *)pair_srcaddr);
3024 		}
3025 	} else {
3026 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
3027 	}
3028 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
3029 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3030 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
3031 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
3032 	ASSERT(ipsapp->ipsap_bucket != NULL);
3033 	ASSERT(ipsapp->ipsap_pbucket != NULL);
3034 	return (ipsapp);
3035 }
3036 
3037 /*
3038  * Initialize the mechanism parameters associated with an SA.
3039  * These parameters can be shared by multiple packets, which saves
3040  * us from the overhead of consulting the algorithm table for
3041  * each packet.
3042  */
3043 static void
3044 sadb_init_alginfo(ipsa_t *sa)
3045 {
3046 	ipsec_alginfo_t *alg;
3047 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
3048 
3049 	mutex_enter(&ipss->ipsec_alg_lock);
3050 
3051 	if (sa->ipsa_encrkey != NULL) {
3052 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
3053 		if (alg != NULL && ALG_VALID(alg)) {
3054 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
3055 			sa->ipsa_emech.cm_param = NULL;
3056 			sa->ipsa_emech.cm_param_len = 0;
3057 			sa->ipsa_iv_len = alg->alg_datalen;
3058 		} else
3059 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3060 	}
3061 
3062 	if (sa->ipsa_authkey != NULL) {
3063 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
3064 		if (alg != NULL && ALG_VALID(alg)) {
3065 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
3066 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
3067 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
3068 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
3069 		} else
3070 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3071 	}
3072 
3073 	mutex_exit(&ipss->ipsec_alg_lock);
3074 }
3075 
3076 /*
3077  * Perform NAT-traversal cached checksum offset calculations here.
3078  */
3079 static void
3080 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
3081     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
3082     uint32_t *dst_addr_ptr)
3083 {
3084 	struct sockaddr_in *natt_loc, *natt_rem;
3085 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
3086 	uint32_t running_sum = 0;
3087 
3088 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
3089 
3090 	if (natt_rem_ext != NULL) {
3091 		uint32_t l_src;
3092 		uint32_t l_rem;
3093 
3094 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
3095 
3096 		/* Ensured by sadb_addrfix(). */
3097 		ASSERT(natt_rem->sin_family == AF_INET);
3098 
3099 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
3100 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
3101 		l_src = *src_addr_ptr;
3102 		l_rem = *natt_rem_ptr;
3103 
3104 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3105 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
3106 
3107 		l_src = ntohl(l_src);
3108 		DOWN_SUM(l_src);
3109 		DOWN_SUM(l_src);
3110 		l_rem = ntohl(l_rem);
3111 		DOWN_SUM(l_rem);
3112 		DOWN_SUM(l_rem);
3113 
3114 		/*
3115 		 * We're 1's complement for checksums, so check for wraparound
3116 		 * here.
3117 		 */
3118 		if (l_rem > l_src)
3119 			l_src--;
3120 
3121 		running_sum += l_src - l_rem;
3122 
3123 		DOWN_SUM(running_sum);
3124 		DOWN_SUM(running_sum);
3125 	}
3126 
3127 	if (natt_loc_ext != NULL) {
3128 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
3129 
3130 		/* Ensured by sadb_addrfix(). */
3131 		ASSERT(natt_loc->sin_family == AF_INET);
3132 
3133 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
3134 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
3135 
3136 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3137 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
3138 
3139 		/*
3140 		 * NAT-T port agility means we may have natt_loc_ext, but
3141 		 * only for a local-port change.
3142 		 */
3143 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
3144 			uint32_t l_dst = ntohl(*dst_addr_ptr);
3145 			uint32_t l_loc = ntohl(*natt_loc_ptr);
3146 
3147 			DOWN_SUM(l_loc);
3148 			DOWN_SUM(l_loc);
3149 			DOWN_SUM(l_dst);
3150 			DOWN_SUM(l_dst);
3151 
3152 			/*
3153 			 * We're 1's complement for checksums, so check for
3154 			 * wraparound here.
3155 			 */
3156 			if (l_loc > l_dst)
3157 				l_dst--;
3158 
3159 			running_sum += l_dst - l_loc;
3160 			DOWN_SUM(running_sum);
3161 			DOWN_SUM(running_sum);
3162 		}
3163 	}
3164 
3165 	newbie->ipsa_inbound_cksum = running_sum;
3166 #undef DOWN_SUM
3167 }
3168 
3169 /*
3170  * This function is called from consumers that need to insert a fully-grown
3171  * security association into its tables.  This function takes into account that
3172  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
3173  * hash bucket parameters are set in order of what the SA will be most of the
3174  * time.  (For example, an SA with an unspecified source, and a multicast
3175  * destination will primarily be an outbound SA.  OTOH, if that destination
3176  * is unicast for this node, then the SA will primarily be inbound.)
3177  *
3178  * It takes a lot of parameters because even if clone is B_FALSE, this needs
3179  * to check both buckets for purposes of collision.
3180  *
3181  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
3182  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
3183  * with additional diagnostic information because there is at least one EINVAL
3184  * case here.
3185  */
3186 int
3187 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
3188     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
3189     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
3190     netstack_t *ns, sadbp_t *spp)
3191 {
3192 	ipsa_t *newbie_clone = NULL, *scratch;
3193 	ipsap_t *ipsapp = NULL;
3194 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3195 	sadb_address_t *srcext =
3196 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3197 	sadb_address_t *dstext =
3198 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3199 	sadb_address_t *isrcext =
3200 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3201 	sadb_address_t *idstext =
3202 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3203 	sadb_x_kmc_t *kmcext =
3204 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3205 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3206 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3207 	sadb_x_pair_t *pair_ext =
3208 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
3209 	sadb_x_replay_ctr_t *replayext =
3210 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
3211 	uint8_t protocol =
3212 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
3213 #if 0
3214 	/*
3215 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
3216 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
3217 	 */
3218 	sadb_sens_t *sens = (sadb_sens_t *);
3219 #endif
3220 	struct sockaddr_in *src, *dst, *isrc, *idst;
3221 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3222 	sadb_lifetime_t *soft =
3223 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3224 	sadb_lifetime_t *hard =
3225 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3226 	sadb_lifetime_t	*idle =
3227 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3228 	sa_family_t af;
3229 	int error = 0;
3230 	boolean_t isupdate = (newbie != NULL);
3231 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3232 	mblk_t *ctl_mp = NULL;
3233 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3234 	int		rcode;
3235 
3236 	if (srcext == NULL) {
3237 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3238 		return (EINVAL);
3239 	}
3240 	if (dstext == NULL) {
3241 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3242 		return (EINVAL);
3243 	}
3244 	if (assoc == NULL) {
3245 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3246 		return (EINVAL);
3247 	}
3248 
3249 	src = (struct sockaddr_in *)(srcext + 1);
3250 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3251 	dst = (struct sockaddr_in *)(dstext + 1);
3252 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3253 	if (isrcext != NULL) {
3254 		isrc = (struct sockaddr_in *)(isrcext + 1);
3255 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3256 		ASSERT(idstext != NULL);
3257 		idst = (struct sockaddr_in *)(idstext + 1);
3258 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3259 	} else {
3260 		isrc = NULL;
3261 		isrc6 = NULL;
3262 	}
3263 
3264 	af = src->sin_family;
3265 
3266 	if (af == AF_INET) {
3267 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3268 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3269 	} else {
3270 		ASSERT(af == AF_INET6);
3271 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3272 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3273 	}
3274 
3275 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
3276 	    cl_inet_checkspi &&
3277 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3278 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
3279 		    assoc->sadb_sa_spi, NULL);
3280 		if (rcode == -1) {
3281 			return (EEXIST);
3282 		}
3283 	}
3284 
3285 	/*
3286 	 * Check to see if the new SA will be cloned AND paired. The
3287 	 * reason a SA will be cloned is the source or destination addresses
3288 	 * are not specific enough to determine if the SA goes in the outbound
3289 	 * or the inbound hash table, so its cloned and put in both. If
3290 	 * the SA is paired, it's soft linked to another SA for the other
3291 	 * direction. Keeping track and looking up SA's that are direction
3292 	 * unspecific and linked is too hard.
3293 	 */
3294 	if (clone && (pair_ext != NULL)) {
3295 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3296 		return (EINVAL);
3297 	}
3298 
3299 	if (!isupdate) {
3300 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3301 		    src_addr_ptr, dst_addr_ptr, af, ns);
3302 		if (newbie == NULL)
3303 			return (ENOMEM);
3304 	}
3305 
3306 	mutex_enter(&newbie->ipsa_lock);
3307 
3308 	if (isrc != NULL) {
3309 		if (isrc->sin_family == AF_INET) {
3310 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3311 				if (srcext->sadb_address_proto != 0) {
3312 					/*
3313 					 * Mismatched outer-packet protocol
3314 					 * and inner-packet address family.
3315 					 */
3316 					mutex_exit(&newbie->ipsa_lock);
3317 					error = EPROTOTYPE;
3318 					*diagnostic =
3319 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3320 					goto error;
3321 				} else {
3322 					/* Fill in with explicit protocol. */
3323 					srcext->sadb_address_proto =
3324 					    IPPROTO_ENCAP;
3325 					dstext->sadb_address_proto =
3326 					    IPPROTO_ENCAP;
3327 				}
3328 			}
3329 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3330 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3331 		} else {
3332 			ASSERT(isrc->sin_family == AF_INET6);
3333 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3334 				if (srcext->sadb_address_proto != 0) {
3335 					/*
3336 					 * Mismatched outer-packet protocol
3337 					 * and inner-packet address family.
3338 					 */
3339 					mutex_exit(&newbie->ipsa_lock);
3340 					error = EPROTOTYPE;
3341 					*diagnostic =
3342 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3343 					goto error;
3344 				} else {
3345 					/* Fill in with explicit protocol. */
3346 					srcext->sadb_address_proto =
3347 					    IPPROTO_IPV6;
3348 					dstext->sadb_address_proto =
3349 					    IPPROTO_IPV6;
3350 				}
3351 			}
3352 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3353 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3354 		}
3355 		newbie->ipsa_innerfam = isrc->sin_family;
3356 
3357 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3358 		    newbie->ipsa_innerfam);
3359 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3360 		    newbie->ipsa_innerfam);
3361 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3362 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3363 
3364 		/* Unique value uses inner-ports for Tunnel Mode... */
3365 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3366 		    idst->sin_port, dstext->sadb_address_proto,
3367 		    idstext->sadb_address_proto);
3368 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3369 		    idst->sin_port, dstext->sadb_address_proto,
3370 		    idstext->sadb_address_proto);
3371 	} else {
3372 		/* ... and outer-ports for Transport Mode. */
3373 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3374 		    dst->sin_port, dstext->sadb_address_proto, 0);
3375 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3376 		    dst->sin_port, dstext->sadb_address_proto, 0);
3377 	}
3378 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3379 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3380 
3381 	sadb_nat_calculations(newbie,
3382 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3383 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3384 	    src_addr_ptr, dst_addr_ptr);
3385 
3386 	newbie->ipsa_type = samsg->sadb_msg_satype;
3387 
3388 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3389 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3390 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3391 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3392 
3393 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3394 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3395 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3396 		mutex_exit(&newbie->ipsa_lock);
3397 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3398 		error = EINVAL;
3399 		goto error;
3400 	}
3401 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3402 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3403 		mutex_exit(&newbie->ipsa_lock);
3404 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3405 		error = EINVAL;
3406 		goto error;
3407 	}
3408 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3409 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3410 		mutex_exit(&newbie->ipsa_lock);
3411 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3412 		error = EINVAL;
3413 		goto error;
3414 	}
3415 	/*
3416 	 * If unspecified source address, force replay_wsize to 0.
3417 	 * This is because an SA that has multiple sources of secure
3418 	 * traffic cannot enforce a replay counter w/o synchronizing the
3419 	 * senders.
3420 	 */
3421 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3422 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3423 	else
3424 		newbie->ipsa_replay_wsize = 0;
3425 
3426 	newbie->ipsa_addtime = gethrestime_sec();
3427 
3428 	if (kmcext != NULL) {
3429 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3430 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3431 	}
3432 
3433 	/*
3434 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3435 	 * The spec says that one can update current lifetimes, but
3436 	 * that seems impractical, especially in the larval-to-mature
3437 	 * update that this function performs.
3438 	 */
3439 	if (soft != NULL) {
3440 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3441 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3442 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3443 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3444 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3445 	}
3446 	if (hard != NULL) {
3447 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3448 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3449 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3450 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3451 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3452 	}
3453 	if (idle != NULL) {
3454 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3455 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3456 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3457 		    newbie->ipsa_idleaddlt;
3458 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3459 	}
3460 
3461 	newbie->ipsa_authtmpl = NULL;
3462 	newbie->ipsa_encrtmpl = NULL;
3463 
3464 	if (akey != NULL) {
3465 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3466 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3467 		/* In case we have to round up to the next byte... */
3468 		if ((akey->sadb_key_bits & 0x7) != 0)
3469 			newbie->ipsa_authkeylen++;
3470 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3471 		    KM_NOSLEEP);
3472 		if (newbie->ipsa_authkey == NULL) {
3473 			error = ENOMEM;
3474 			mutex_exit(&newbie->ipsa_lock);
3475 			goto error;
3476 		}
3477 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3478 		bzero(akey + 1, newbie->ipsa_authkeylen);
3479 
3480 		/*
3481 		 * Pre-initialize the kernel crypto framework key
3482 		 * structure.
3483 		 */
3484 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3485 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3486 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3487 
3488 		mutex_enter(&ipss->ipsec_alg_lock);
3489 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3490 		mutex_exit(&ipss->ipsec_alg_lock);
3491 		if (error != 0) {
3492 			mutex_exit(&newbie->ipsa_lock);
3493 			/*
3494 			 * An error here indicates that alg is the wrong type
3495 			 * (IE: not authentication) or its not in the alg tables
3496 			 * created by ipsecalgs(1m), or Kcf does not like the
3497 			 * parameters passed in with this algorithm, which is
3498 			 * probably a coding error!
3499 			 */
3500 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3501 			goto error;
3502 		}
3503 	}
3504 
3505 	if (ekey != NULL) {
3506 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3507 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3508 		/* In case we have to round up to the next byte... */
3509 		if ((ekey->sadb_key_bits & 0x7) != 0)
3510 			newbie->ipsa_encrkeylen++;
3511 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3512 		    KM_NOSLEEP);
3513 		if (newbie->ipsa_encrkey == NULL) {
3514 			error = ENOMEM;
3515 			mutex_exit(&newbie->ipsa_lock);
3516 			goto error;
3517 		}
3518 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3519 		/* XXX is this safe w.r.t db_ref, etc? */
3520 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3521 
3522 		/*
3523 		 * Pre-initialize the kernel crypto framework key
3524 		 * structure.
3525 		 */
3526 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3527 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3528 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3529 
3530 		mutex_enter(&ipss->ipsec_alg_lock);
3531 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3532 		mutex_exit(&ipss->ipsec_alg_lock);
3533 		if (error != 0) {
3534 			mutex_exit(&newbie->ipsa_lock);
3535 			/* See above for error explanation. */
3536 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3537 			goto error;
3538 		}
3539 	}
3540 
3541 	sadb_init_alginfo(newbie);
3542 
3543 	/*
3544 	 * Ptrs to processing functions.
3545 	 */
3546 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3547 		ipsecesp_init_funcs(newbie);
3548 	else
3549 		ipsecah_init_funcs(newbie);
3550 	ASSERT(newbie->ipsa_output_func != NULL &&
3551 	    newbie->ipsa_input_func != NULL);
3552 
3553 	/*
3554 	 * Certificate ID stuff.
3555 	 */
3556 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3557 		sadb_ident_t *id =
3558 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3559 
3560 		/*
3561 		 * Can assume strlen() will return okay because ext_check() in
3562 		 * keysock.c prepares the string for us.
3563 		 */
3564 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3565 		    (char *)(id+1), ns);
3566 		if (newbie->ipsa_src_cid == NULL) {
3567 			error = ENOMEM;
3568 			mutex_exit(&newbie->ipsa_lock);
3569 			goto error;
3570 		}
3571 	}
3572 
3573 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3574 		sadb_ident_t *id =
3575 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3576 
3577 		/*
3578 		 * Can assume strlen() will return okay because ext_check() in
3579 		 * keysock.c prepares the string for us.
3580 		 */
3581 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3582 		    (char *)(id+1), ns);
3583 		if (newbie->ipsa_dst_cid == NULL) {
3584 			error = ENOMEM;
3585 			mutex_exit(&newbie->ipsa_lock);
3586 			goto error;
3587 		}
3588 	}
3589 
3590 #if 0
3591 	/* XXXMLS  SENSITIVITY handling code. */
3592 	if (sens != NULL) {
3593 		int i;
3594 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3595 
3596 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3597 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3598 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3599 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3600 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3601 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3602 		    KM_NOSLEEP);
3603 		if (newbie->ipsa_integ == NULL) {
3604 			error = ENOMEM;
3605 			mutex_exit(&newbie->ipsa_lock);
3606 			goto error;
3607 		}
3608 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3609 		    KM_NOSLEEP);
3610 		if (newbie->ipsa_sens == NULL) {
3611 			error = ENOMEM;
3612 			mutex_exit(&newbie->ipsa_lock);
3613 			goto error;
3614 		}
3615 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3616 			newbie->ipsa_sens[i] = *bitmap;
3617 			bitmap++;
3618 		}
3619 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3620 			newbie->ipsa_integ[i] = *bitmap;
3621 			bitmap++;
3622 		}
3623 	}
3624 
3625 #endif
3626 
3627 	if (replayext != NULL) {
3628 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3629 		    (replayext->sadb_x_rc_replay64 != 0)) {
3630 			error = EOPNOTSUPP;
3631 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3632 			mutex_exit(&newbie->ipsa_lock);
3633 			goto error;
3634 		}
3635 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3636 	}
3637 
3638 	/* now that the SA has been updated, set its new state */
3639 	newbie->ipsa_state = assoc->sadb_sa_state;
3640 
3641 	if (clone) {
3642 		newbie->ipsa_haspeer = B_TRUE;
3643 	} else {
3644 		if (!is_inbound) {
3645 			lifetime_fuzz(newbie);
3646 		}
3647 	}
3648 	/*
3649 	 * The less locks I hold when doing an insertion and possible cloning,
3650 	 * the better!
3651 	 */
3652 	mutex_exit(&newbie->ipsa_lock);
3653 
3654 	if (clone) {
3655 		newbie_clone = sadb_cloneassoc(newbie);
3656 
3657 		if (newbie_clone == NULL) {
3658 			error = ENOMEM;
3659 			goto error;
3660 		}
3661 	}
3662 
3663 	/*
3664 	 * Enter the bucket locks.  The order of entry is outbound,
3665 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3666 	 * based on the destination address type.  If the destination address
3667 	 * type is for a node that isn't mine (or potentially mine), the
3668 	 * "primary" bucket is the outbound one.
3669 	 */
3670 	if (!is_inbound) {
3671 		/* primary == outbound */
3672 		mutex_enter(&primary->isaf_lock);
3673 		mutex_enter(&secondary->isaf_lock);
3674 	} else {
3675 		/* primary == inbound */
3676 		mutex_enter(&secondary->isaf_lock);
3677 		mutex_enter(&primary->isaf_lock);
3678 	}
3679 
3680 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3681 	    newbie->ipsa_spi));
3682 
3683 	/*
3684 	 * sadb_insertassoc() doesn't increment the reference
3685 	 * count.  We therefore have to increment the
3686 	 * reference count one more time to reflect the
3687 	 * pointers of the table that reference this SA.
3688 	 */
3689 	IPSA_REFHOLD(newbie);
3690 
3691 	if (isupdate) {
3692 		/*
3693 		 * Unlink from larval holding cell in the "inbound" fanout.
3694 		 */
3695 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3696 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3697 		sadb_unlinkassoc(newbie);
3698 	}
3699 
3700 	mutex_enter(&newbie->ipsa_lock);
3701 	error = sadb_insertassoc(newbie, primary);
3702 	if (error == 0) {
3703 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3704 		    is_inbound);
3705 	}
3706 	mutex_exit(&newbie->ipsa_lock);
3707 
3708 	if (error != 0) {
3709 		/*
3710 		 * Since sadb_insertassoc() failed, we must decrement the
3711 		 * refcount again so the cleanup code will actually free
3712 		 * the offending SA.
3713 		 */
3714 		IPSA_REFRELE(newbie);
3715 		goto error_unlock;
3716 	}
3717 
3718 	if (newbie_clone != NULL) {
3719 		mutex_enter(&newbie_clone->ipsa_lock);
3720 		error = sadb_insertassoc(newbie_clone, secondary);
3721 		mutex_exit(&newbie_clone->ipsa_lock);
3722 		if (error != 0) {
3723 			/* Collision in secondary table. */
3724 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3725 			goto error_unlock;
3726 		}
3727 		IPSA_REFHOLD(newbie_clone);
3728 	} else {
3729 		ASSERT(primary != secondary);
3730 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3731 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3732 		if (scratch != NULL) {
3733 			/* Collision in secondary table. */
3734 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3735 			/* Set the error, since ipsec_getassocbyspi() can't. */
3736 			error = EEXIST;
3737 			goto error_unlock;
3738 		}
3739 	}
3740 
3741 	/* OKAY!  So let's do some reality check assertions. */
3742 
3743 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3744 	ASSERT(newbie_clone == NULL ||
3745 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3746 	/*
3747 	 * If hardware acceleration could happen, send it.
3748 	 */
3749 	if (ctl_mp != NULL) {
3750 		putnext(ip_q, ctl_mp);
3751 		ctl_mp = NULL;
3752 	}
3753 
3754 error_unlock:
3755 
3756 	/*
3757 	 * We can exit the locks in any order.	Only entrance needs to
3758 	 * follow any protocol.
3759 	 */
3760 	mutex_exit(&secondary->isaf_lock);
3761 	mutex_exit(&primary->isaf_lock);
3762 
3763 	if (pair_ext != NULL && error == 0) {
3764 		/* update pair_spi if it exists. */
3765 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3766 		if (ipsapp == NULL) {
3767 			error = ESRCH;
3768 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3769 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3770 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3771 			error = EINVAL;
3772 		} else {
3773 			/* update_pairing() sets diagnostic */
3774 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3775 		}
3776 	}
3777 	/* Common error point for this routine. */
3778 error:
3779 	if (newbie != NULL) {
3780 		if (error != 0) {
3781 			/* This SA is broken, let the reaper clean up. */
3782 			mutex_enter(&newbie->ipsa_lock);
3783 			newbie->ipsa_state = IPSA_STATE_DEAD;
3784 			newbie->ipsa_hardexpiretime = 1;
3785 			mutex_exit(&newbie->ipsa_lock);
3786 		}
3787 		IPSA_REFRELE(newbie);
3788 	}
3789 	if (newbie_clone != NULL) {
3790 		IPSA_REFRELE(newbie_clone);
3791 	}
3792 	if (ctl_mp != NULL)
3793 		freemsg(ctl_mp);
3794 
3795 	if (error == 0) {
3796 		/*
3797 		 * Construct favorable PF_KEY return message and send to
3798 		 * keysock. Update the flags in the original keysock message
3799 		 * to reflect the actual flags in the new SA.
3800 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3801 		 * make sure to REFHOLD, call, then REFRELE.)
3802 		 */
3803 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3804 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3805 	}
3806 
3807 	destroy_ipsa_pair(ipsapp);
3808 	return (error);
3809 }
3810 
3811 /*
3812  * Set the time of first use for a security association.  Update any
3813  * expiration times as a result.
3814  */
3815 void
3816 sadb_set_usetime(ipsa_t *assoc)
3817 {
3818 	time_t snapshot = gethrestime_sec();
3819 
3820 	mutex_enter(&assoc->ipsa_lock);
3821 	assoc->ipsa_lastuse = snapshot;
3822 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3823 
3824 	/*
3825 	 * Caller does check usetime before calling me usually, and
3826 	 * double-checking is better than a mutex_enter/exit hit.
3827 	 */
3828 	if (assoc->ipsa_usetime == 0) {
3829 		/*
3830 		 * This is redundant for outbound SA's, as
3831 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3832 		 * Inbound SAs, however, have no such protection.
3833 		 */
3834 		assoc->ipsa_flags |= IPSA_F_USED;
3835 		assoc->ipsa_usetime = snapshot;
3836 
3837 		/*
3838 		 * After setting the use time, see if we have a use lifetime
3839 		 * that would cause the actual SA expiration time to shorten.
3840 		 */
3841 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3842 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3843 	}
3844 	mutex_exit(&assoc->ipsa_lock);
3845 }
3846 
3847 /*
3848  * Send up a PF_KEY expire message for this association.
3849  */
3850 static void
3851 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3852 {
3853 	mblk_t *mp, *mp1;
3854 	int alloclen, af;
3855 	sadb_msg_t *samsg;
3856 	sadb_lifetime_t *current, *expire;
3857 	sadb_sa_t *saext;
3858 	uint8_t *end;
3859 	boolean_t tunnel_mode;
3860 
3861 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3862 
3863 	/* Don't bother sending if there's no queue. */
3864 	if (pfkey_q == NULL)
3865 		return;
3866 
3867 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3868 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3869 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3870 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3871 		return;
3872 	}
3873 
3874 	mp = sadb_keysock_out(0);
3875 	if (mp == NULL) {
3876 		/* cmn_err(CE_WARN, */
3877 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3878 		return;
3879 	}
3880 
3881 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3882 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3883 
3884 	af = assoc->ipsa_addrfam;
3885 	switch (af) {
3886 	case AF_INET:
3887 		alloclen += 2 * sizeof (struct sockaddr_in);
3888 		break;
3889 	case AF_INET6:
3890 		alloclen += 2 * sizeof (struct sockaddr_in6);
3891 		break;
3892 	default:
3893 		/* Won't happen unless there's a kernel bug. */
3894 		freeb(mp);
3895 		cmn_err(CE_WARN,
3896 		    "sadb_expire_assoc: Unknown address length.\n");
3897 		return;
3898 	}
3899 
3900 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3901 	if (tunnel_mode) {
3902 		alloclen += 2 * sizeof (sadb_address_t);
3903 		switch (assoc->ipsa_innerfam) {
3904 		case AF_INET:
3905 			alloclen += 2 * sizeof (struct sockaddr_in);
3906 			break;
3907 		case AF_INET6:
3908 			alloclen += 2 * sizeof (struct sockaddr_in6);
3909 			break;
3910 		default:
3911 			/* Won't happen unless there's a kernel bug. */
3912 			freeb(mp);
3913 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3914 			    "Unknown inner address length.\n");
3915 			return;
3916 		}
3917 	}
3918 
3919 	mp->b_cont = allocb(alloclen, BPRI_HI);
3920 	if (mp->b_cont == NULL) {
3921 		freeb(mp);
3922 		/* cmn_err(CE_WARN, */
3923 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3924 		return;
3925 	}
3926 
3927 	mp1 = mp;
3928 	mp = mp->b_cont;
3929 	end = mp->b_wptr + alloclen;
3930 
3931 	samsg = (sadb_msg_t *)mp->b_wptr;
3932 	mp->b_wptr += sizeof (*samsg);
3933 	samsg->sadb_msg_version = PF_KEY_V2;
3934 	samsg->sadb_msg_type = SADB_EXPIRE;
3935 	samsg->sadb_msg_errno = 0;
3936 	samsg->sadb_msg_satype = assoc->ipsa_type;
3937 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3938 	samsg->sadb_msg_reserved = 0;
3939 	samsg->sadb_msg_seq = 0;
3940 	samsg->sadb_msg_pid = 0;
3941 
3942 	saext = (sadb_sa_t *)mp->b_wptr;
3943 	mp->b_wptr += sizeof (*saext);
3944 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3945 	saext->sadb_sa_exttype = SADB_EXT_SA;
3946 	saext->sadb_sa_spi = assoc->ipsa_spi;
3947 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3948 	saext->sadb_sa_state = assoc->ipsa_state;
3949 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3950 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3951 	saext->sadb_sa_flags = assoc->ipsa_flags;
3952 
3953 	current = (sadb_lifetime_t *)mp->b_wptr;
3954 	mp->b_wptr += sizeof (sadb_lifetime_t);
3955 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3956 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3957 	/* We do not support the concept. */
3958 	current->sadb_lifetime_allocations = 0;
3959 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3960 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3961 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3962 
3963 	expire = (sadb_lifetime_t *)mp->b_wptr;
3964 	mp->b_wptr += sizeof (*expire);
3965 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3966 
3967 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3968 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3969 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3970 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3971 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3972 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3973 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3974 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3975 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3976 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3977 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3978 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3979 	} else {
3980 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3981 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3982 		expire->sadb_lifetime_allocations = 0;
3983 		expire->sadb_lifetime_bytes = 0;
3984 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3985 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3986 	}
3987 
3988 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3989 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3990 	    SA_PROTO(assoc), 0);
3991 	ASSERT(mp->b_wptr != NULL);
3992 
3993 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3994 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3995 	    SA_PROTO(assoc), 0);
3996 	ASSERT(mp->b_wptr != NULL);
3997 
3998 	if (tunnel_mode) {
3999 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
4000 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
4001 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
4002 		    assoc->ipsa_innersrcpfx);
4003 		ASSERT(mp->b_wptr != NULL);
4004 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
4005 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
4006 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
4007 		    assoc->ipsa_innerdstpfx);
4008 		ASSERT(mp->b_wptr != NULL);
4009 	}
4010 
4011 	/* Can just putnext, we're ready to go! */
4012 	putnext(pfkey_q, mp1);
4013 }
4014 
4015 /*
4016  * "Age" the SA with the number of bytes that was used to protect traffic.
4017  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
4018  * enough "charge" left in the SA to protect the data.	Return B_FALSE
4019  * otherwise.  (If B_FALSE is returned, the association either was, or became
4020  * DEAD.)
4021  */
4022 boolean_t
4023 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
4024     boolean_t sendmsg)
4025 {
4026 	boolean_t rc = B_TRUE;
4027 	uint64_t newtotal;
4028 
4029 	mutex_enter(&assoc->ipsa_lock);
4030 	newtotal = assoc->ipsa_bytes + bytes;
4031 	if (assoc->ipsa_hardbyteslt != 0 &&
4032 	    newtotal >= assoc->ipsa_hardbyteslt) {
4033 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
4034 			sadb_delete_cluster(assoc);
4035 			/*
4036 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4037 			 * this off on another non-interrupt thread.  Also
4038 			 * unlink this SA immediately.
4039 			 */
4040 			assoc->ipsa_state = IPSA_STATE_DEAD;
4041 			if (sendmsg)
4042 				sadb_expire_assoc(pfkey_q, assoc);
4043 			/*
4044 			 * Set non-zero expiration time so sadb_age_assoc()
4045 			 * will work when reaping.
4046 			 */
4047 			assoc->ipsa_hardexpiretime = (time_t)1;
4048 		} /* Else someone beat me to it! */
4049 		rc = B_FALSE;
4050 	} else if (assoc->ipsa_softbyteslt != 0 &&
4051 	    (newtotal >= assoc->ipsa_softbyteslt)) {
4052 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
4053 			/*
4054 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4055 			 * this off on another non-interrupt thread.
4056 			 */
4057 			assoc->ipsa_state = IPSA_STATE_DYING;
4058 			assoc->ipsa_bytes = newtotal;
4059 			if (sendmsg)
4060 				sadb_expire_assoc(pfkey_q, assoc);
4061 		} /* Else someone beat me to it! */
4062 	}
4063 	if (rc == B_TRUE)
4064 		assoc->ipsa_bytes = newtotal;
4065 	mutex_exit(&assoc->ipsa_lock);
4066 	return (rc);
4067 }
4068 
4069 /*
4070  * Push one or more DL_CO_DELETE messages queued up by
4071  * sadb_torch_assoc down to the underlying driver now that it's a
4072  * convenient time for it (i.e., ipsa bucket locks not held).
4073  */
4074 static void
4075 sadb_drain_torchq(queue_t *q, mblk_t *mp)
4076 {
4077 	while (mp != NULL) {
4078 		mblk_t *next = mp->b_next;
4079 		mp->b_next = NULL;
4080 		if (q != NULL)
4081 			putnext(q, mp);
4082 		else
4083 			freemsg(mp);
4084 		mp = next;
4085 	}
4086 }
4087 
4088 /*
4089  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
4090  *     sadb_age_assoc().
4091  *
4092  * If SA is hardware-accelerated, and we can't allocate the mblk
4093  * containing the DL_CO_DELETE, just return; it will remain in the
4094  * table and be swept up by sadb_ager() in a subsequent pass.
4095  */
4096 static ipsa_t *
4097 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
4098 {
4099 	mblk_t *mp;
4100 
4101 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4102 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
4103 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
4104 
4105 	/*
4106 	 * Force cached SAs to be revalidated..
4107 	 */
4108 	head->isaf_gen++;
4109 
4110 	if (sa->ipsa_flags & IPSA_F_HW) {
4111 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
4112 		if (mp == NULL) {
4113 			mutex_exit(&sa->ipsa_lock);
4114 			return (NULL);
4115 		}
4116 		mp->b_next = *mq;
4117 		*mq = mp;
4118 	}
4119 	mutex_exit(&sa->ipsa_lock);
4120 	sadb_unlinkassoc(sa);
4121 
4122 	return (NULL);
4123 }
4124 
4125 /*
4126  * Do various SA-is-idle activities depending on delta (the number of idle
4127  * seconds on the SA) and/or other properties of the SA.
4128  *
4129  * Return B_TRUE if I've sent a packet, because I have to drop the
4130  * association's mutex before sending a packet out the wire.
4131  */
4132 /* ARGSUSED */
4133 static boolean_t
4134 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4135 {
4136 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4137 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4138 
4139 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4140 
4141 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4142 	    delta >= nat_t_interval &&
4143 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4144 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4145 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4146 		mutex_exit(&assoc->ipsa_lock);
4147 		ipsecesp_send_keepalive(assoc);
4148 		return (B_TRUE);
4149 	}
4150 	return (B_FALSE);
4151 }
4152 
4153 /*
4154  * Return "assoc" if haspeer is true and I send an expire.  This allows
4155  * the consumers' aging functions to tidy up an expired SA's peer.
4156  */
4157 static ipsa_t *
4158 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4159     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
4160 {
4161 	ipsa_t *retval = NULL;
4162 	boolean_t dropped_mutex = B_FALSE;
4163 
4164 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4165 
4166 	mutex_enter(&assoc->ipsa_lock);
4167 
4168 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4169 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4170 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4171 	    (assoc->ipsa_hardexpiretime != 0))) &&
4172 	    (assoc->ipsa_hardexpiretime <= current)) {
4173 		assoc->ipsa_state = IPSA_STATE_DEAD;
4174 		return (sadb_torch_assoc(head, assoc, inbound, mq));
4175 	}
4176 
4177 	/*
4178 	 * Check lifetimes.  Fortunately, SA setup is done
4179 	 * such that there are only two times to look at,
4180 	 * softexpiretime, and hardexpiretime.
4181 	 *
4182 	 * Check hard first.
4183 	 */
4184 
4185 	if (assoc->ipsa_hardexpiretime != 0 &&
4186 	    assoc->ipsa_hardexpiretime <= current) {
4187 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4188 			return (sadb_torch_assoc(head, assoc, inbound, mq));
4189 
4190 		if (inbound) {
4191 			sadb_delete_cluster(assoc);
4192 		}
4193 
4194 		/*
4195 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4196 		 */
4197 		assoc->ipsa_state = IPSA_STATE_DEAD;
4198 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4199 			/*
4200 			 * If the SA is paired or peered with another, put
4201 			 * a copy on a list which can be processed later, the
4202 			 * pair/peer SA needs to be updated so the both die
4203 			 * at the same time.
4204 			 *
4205 			 * If I return assoc, I have to bump up its reference
4206 			 * count to keep with the ipsa_t reference count
4207 			 * semantics.
4208 			 */
4209 			IPSA_REFHOLD(assoc);
4210 			retval = assoc;
4211 		}
4212 		sadb_expire_assoc(pfkey_q, assoc);
4213 		assoc->ipsa_hardexpiretime = current + reap_delay;
4214 	} else if (assoc->ipsa_softexpiretime != 0 &&
4215 	    assoc->ipsa_softexpiretime <= current &&
4216 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4217 		/*
4218 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4219 		 * this off on another non-interrupt thread.
4220 		 */
4221 		assoc->ipsa_state = IPSA_STATE_DYING;
4222 		if (assoc->ipsa_haspeer) {
4223 			/*
4224 			 * If the SA has a peer, update the peer's state
4225 			 * on SOFT_EXPIRE, this is mostly to prevent two
4226 			 * expire messages from effectively the same SA.
4227 			 *
4228 			 * Don't care about paired SA's, then can (and should)
4229 			 * be able to soft expire at different times.
4230 			 *
4231 			 * If I return assoc, I have to bump up its
4232 			 * reference count to keep with the ipsa_t reference
4233 			 * count semantics.
4234 			 */
4235 			IPSA_REFHOLD(assoc);
4236 			retval = assoc;
4237 		}
4238 		sadb_expire_assoc(pfkey_q, assoc);
4239 	} else if (assoc->ipsa_idletime != 0 &&
4240 	    assoc->ipsa_idleexpiretime <= current) {
4241 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4242 			assoc->ipsa_state = IPSA_STATE_IDLE;
4243 		}
4244 
4245 		/*
4246 		 * Need to handle Mature case
4247 		 */
4248 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4249 			sadb_expire_assoc(pfkey_q, assoc);
4250 		}
4251 	} else {
4252 		/* Check idle time activities. */
4253 		dropped_mutex = sadb_idle_activities(assoc,
4254 		    current - assoc->ipsa_lastuse, inbound);
4255 	}
4256 
4257 	if (!dropped_mutex)
4258 		mutex_exit(&assoc->ipsa_lock);
4259 	return (retval);
4260 }
4261 
4262 /*
4263  * Called by a consumer protocol to do ther dirty work of reaping dead
4264  * Security Associations.
4265  *
4266  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4267  * SA's that are already marked DEAD, so expired SA's are only reaped
4268  * the second time sadb_ager() runs.
4269  */
4270 void
4271 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
4272     netstack_t *ns)
4273 {
4274 	int i;
4275 	isaf_t *bucket;
4276 	ipsa_t *assoc, *spare;
4277 	iacqf_t *acqlist;
4278 	ipsacq_t *acqrec, *spareacq;
4279 	templist_t *haspeerlist, *newbie;
4280 	/* Snapshot current time now. */
4281 	time_t current = gethrestime_sec();
4282 	mblk_t *mq = NULL;
4283 	haspeerlist = NULL;
4284 
4285 	/*
4286 	 * Do my dirty work.  This includes aging real entries, aging
4287 	 * larvals, and aging outstanding ACQUIREs.
4288 	 *
4289 	 * I hope I don't tie up resources for too long.
4290 	 */
4291 
4292 	/* Age acquires. */
4293 
4294 	for (i = 0; i < sp->sdb_hashsize; i++) {
4295 		acqlist = &sp->sdb_acq[i];
4296 		mutex_enter(&acqlist->iacqf_lock);
4297 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4298 		    acqrec = spareacq) {
4299 			spareacq = acqrec->ipsacq_next;
4300 			if (current > acqrec->ipsacq_expire)
4301 				sadb_destroy_acquire(acqrec, ns);
4302 		}
4303 		mutex_exit(&acqlist->iacqf_lock);
4304 	}
4305 
4306 	/* Age inbound associations. */
4307 	for (i = 0; i < sp->sdb_hashsize; i++) {
4308 		bucket = &(sp->sdb_if[i]);
4309 		mutex_enter(&bucket->isaf_lock);
4310 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4311 		    assoc = spare) {
4312 			spare = assoc->ipsa_next;
4313 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4314 			    reap_delay, B_TRUE, &mq) != NULL) {
4315 				/*
4316 				 * Put SA's which have a peer or SA's which
4317 				 * are paired on a list for processing after
4318 				 * all the hash tables have been walked.
4319 				 *
4320 				 * sadb_age_assoc() increments the refcnt,
4321 				 * effectively doing an IPSA_REFHOLD().
4322 				 */
4323 				newbie = kmem_alloc(sizeof (*newbie),
4324 				    KM_NOSLEEP);
4325 				if (newbie == NULL) {
4326 					/*
4327 					 * Don't forget to REFRELE().
4328 					 */
4329 					IPSA_REFRELE(assoc);
4330 					continue;	/* for loop... */
4331 				}
4332 				newbie->next = haspeerlist;
4333 				newbie->ipsa = assoc;
4334 				haspeerlist = newbie;
4335 			}
4336 		}
4337 		mutex_exit(&bucket->isaf_lock);
4338 	}
4339 
4340 	if (mq != NULL) {
4341 		sadb_drain_torchq(ip_q, mq);
4342 		mq = NULL;
4343 	}
4344 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4345 	haspeerlist = NULL;
4346 
4347 	/* Age outbound associations. */
4348 	for (i = 0; i < sp->sdb_hashsize; i++) {
4349 		bucket = &(sp->sdb_of[i]);
4350 		mutex_enter(&bucket->isaf_lock);
4351 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4352 		    assoc = spare) {
4353 			spare = assoc->ipsa_next;
4354 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4355 			    reap_delay, B_FALSE, &mq) != NULL) {
4356 				/*
4357 				 * sadb_age_assoc() increments the refcnt,
4358 				 * effectively doing an IPSA_REFHOLD().
4359 				 */
4360 				newbie = kmem_alloc(sizeof (*newbie),
4361 				    KM_NOSLEEP);
4362 				if (newbie == NULL) {
4363 					/*
4364 					 * Don't forget to REFRELE().
4365 					 */
4366 					IPSA_REFRELE(assoc);
4367 					continue;	/* for loop... */
4368 				}
4369 				newbie->next = haspeerlist;
4370 				newbie->ipsa = assoc;
4371 				haspeerlist = newbie;
4372 			}
4373 		}
4374 		mutex_exit(&bucket->isaf_lock);
4375 	}
4376 	if (mq != NULL) {
4377 		sadb_drain_torchq(ip_q, mq);
4378 		mq = NULL;
4379 	}
4380 
4381 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4382 
4383 	/*
4384 	 * Run a GC pass to clean out dead identities.
4385 	 */
4386 	ipsid_gc(ns);
4387 }
4388 
4389 /*
4390  * Figure out when to reschedule the ager.
4391  */
4392 timeout_id_t
4393 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4394     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4395 {
4396 	hrtime_t end = gethrtime();
4397 	uint_t interval = *intp;
4398 
4399 	/*
4400 	 * See how long this took.  If it took too long, increase the
4401 	 * aging interval.
4402 	 */
4403 	if ((end - begin) > (hrtime_t)interval * (hrtime_t)1000000) {
4404 		if (interval >= intmax) {
4405 			/* XXX Rate limit this?  Or recommend flush? */
4406 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4407 			    "Too many SA's to age out in %d msec.\n",
4408 			    intmax);
4409 		} else {
4410 			/* Double by shifting by one bit. */
4411 			interval <<= 1;
4412 			interval = min(interval, intmax);
4413 		}
4414 	} else if ((end - begin) <= (hrtime_t)interval * (hrtime_t)500000 &&
4415 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4416 		/*
4417 		 * If I took less than half of the interval, then I should
4418 		 * ratchet the interval back down.  Never automatically
4419 		 * shift below the default aging interval.
4420 		 *
4421 		 * NOTE:This even overrides manual setting of the age
4422 		 *	interval using NDD to lower the setting past the
4423 		 *	default.  In other words, if you set the interval
4424 		 *	lower than the default, and your SADB gets too big,
4425 		 *	the interval will only self-lower back to the default.
4426 		 */
4427 		/* Halve by shifting one bit. */
4428 		interval >>= 1;
4429 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4430 	}
4431 	*intp = interval;
4432 	return (qtimeout(pfkey_q, ager, agerarg,
4433 	    drv_usectohz(interval * 1000)));
4434 }
4435 
4436 
4437 /*
4438  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4439  * message takes when updating a MATURE or DYING SA.
4440  */
4441 static void
4442 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4443     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4444 {
4445 	mutex_enter(&assoc->ipsa_lock);
4446 
4447 	/*
4448 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4449 	 * passed in during an update message.	We currently don't handle
4450 	 * these.
4451 	 */
4452 
4453 	if (hard != NULL) {
4454 		if (hard->sadb_lifetime_bytes != 0)
4455 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4456 		if (hard->sadb_lifetime_usetime != 0)
4457 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4458 		if (hard->sadb_lifetime_addtime != 0)
4459 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4460 		if (assoc->ipsa_hardaddlt != 0) {
4461 			assoc->ipsa_hardexpiretime =
4462 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4463 		}
4464 		if (assoc->ipsa_harduselt != 0 &&
4465 		    assoc->ipsa_flags & IPSA_F_USED) {
4466 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4467 		}
4468 		if (hard->sadb_lifetime_allocations != 0)
4469 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4470 	}
4471 
4472 	if (soft != NULL) {
4473 		if (soft->sadb_lifetime_bytes != 0) {
4474 			if (soft->sadb_lifetime_bytes >
4475 			    assoc->ipsa_hardbyteslt) {
4476 				assoc->ipsa_softbyteslt =
4477 				    assoc->ipsa_hardbyteslt;
4478 			} else {
4479 				assoc->ipsa_softbyteslt =
4480 				    soft->sadb_lifetime_bytes;
4481 			}
4482 		}
4483 		if (soft->sadb_lifetime_usetime != 0) {
4484 			if (soft->sadb_lifetime_usetime >
4485 			    assoc->ipsa_harduselt) {
4486 				assoc->ipsa_softuselt =
4487 				    assoc->ipsa_harduselt;
4488 			} else {
4489 				assoc->ipsa_softuselt =
4490 				    soft->sadb_lifetime_usetime;
4491 			}
4492 		}
4493 		if (soft->sadb_lifetime_addtime != 0) {
4494 			if (soft->sadb_lifetime_addtime >
4495 			    assoc->ipsa_hardexpiretime) {
4496 				assoc->ipsa_softexpiretime =
4497 				    assoc->ipsa_hardexpiretime;
4498 			} else {
4499 				assoc->ipsa_softaddlt =
4500 				    soft->sadb_lifetime_addtime;
4501 			}
4502 		}
4503 		if (assoc->ipsa_softaddlt != 0) {
4504 			assoc->ipsa_softexpiretime =
4505 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4506 		}
4507 		if (assoc->ipsa_softuselt != 0 &&
4508 		    assoc->ipsa_flags & IPSA_F_USED) {
4509 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4510 		}
4511 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4512 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4513 				lifetime_fuzz(assoc);
4514 		}
4515 
4516 		if (soft->sadb_lifetime_allocations != 0)
4517 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4518 	}
4519 
4520 	if (idle != NULL) {
4521 		time_t current = gethrestime_sec();
4522 		if ((assoc->ipsa_idleexpiretime <= current) &&
4523 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4524 			assoc->ipsa_idleexpiretime =
4525 			    current + assoc->ipsa_idleaddlt;
4526 		}
4527 		if (idle->sadb_lifetime_addtime != 0)
4528 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4529 		if (idle->sadb_lifetime_usetime != 0)
4530 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4531 		if (assoc->ipsa_idleaddlt != 0) {
4532 			assoc->ipsa_idleexpiretime =
4533 			    current + idle->sadb_lifetime_addtime;
4534 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4535 		}
4536 		if (assoc->ipsa_idleuselt != 0) {
4537 			if (assoc->ipsa_idletime != 0) {
4538 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4539 				    assoc->ipsa_idleuselt);
4540 			assoc->ipsa_idleexpiretime =
4541 			    current + assoc->ipsa_idletime;
4542 			} else {
4543 				assoc->ipsa_idleexpiretime =
4544 				    current + assoc->ipsa_idleuselt;
4545 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4546 			}
4547 		}
4548 	}
4549 	mutex_exit(&assoc->ipsa_lock);
4550 }
4551 
4552 static int
4553 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4554 {
4555 	int rcode = 0;
4556 	time_t current = gethrestime_sec();
4557 
4558 	mutex_enter(&assoc->ipsa_lock);
4559 
4560 	switch (new_state) {
4561 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4562 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4563 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4564 			assoc->ipsa_idleexpiretime =
4565 			    current + assoc->ipsa_idletime;
4566 		}
4567 		break;
4568 	case SADB_X_SASTATE_IDLE:
4569 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4570 			assoc->ipsa_state = IPSA_STATE_IDLE;
4571 			assoc->ipsa_idleexpiretime =
4572 			    current + assoc->ipsa_idletime;
4573 		} else {
4574 			rcode = EINVAL;
4575 		}
4576 		break;
4577 
4578 	case SADB_X_SASTATE_ACTIVE:
4579 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4580 			rcode = EINVAL;
4581 			break;
4582 		}
4583 		assoc->ipsa_state = IPSA_STATE_MATURE;
4584 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4585 
4586 		if (ipkt_lst == NULL) {
4587 			break;
4588 		}
4589 
4590 		if (assoc->ipsa_bpkt_head != NULL) {
4591 			*ipkt_lst = assoc->ipsa_bpkt_head;
4592 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4593 			assoc->ipsa_mblkcnt = 0;
4594 		} else {
4595 			*ipkt_lst = NULL;
4596 		}
4597 		break;
4598 	default:
4599 		rcode = EINVAL;
4600 		break;
4601 	}
4602 
4603 	mutex_exit(&assoc->ipsa_lock);
4604 	return (rcode);
4605 }
4606 
4607 /*
4608  * Common code to update an SA.
4609  */
4610 
4611 int
4612 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4613     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4614     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4615     netstack_t *ns, uint8_t sadb_msg_type)
4616 {
4617 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4618 	sadb_address_t *srcext =
4619 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4620 	sadb_address_t *dstext =
4621 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4622 	sadb_x_kmc_t *kmcext =
4623 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4624 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4625 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4626 	sadb_x_replay_ctr_t *replext =
4627 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4628 	sadb_lifetime_t *soft =
4629 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4630 	sadb_lifetime_t *hard =
4631 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4632 	sadb_lifetime_t *idle =
4633 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4634 	sadb_x_pair_t *pair_ext =
4635 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4636 	ipsa_t *echo_target = NULL;
4637 	int error = 0;
4638 	ipsap_t *ipsapp = NULL;
4639 	uint32_t kmp = 0, kmc = 0;
4640 	time_t current = gethrestime_sec();
4641 
4642 
4643 	/* I need certain extensions present for either UPDATE message. */
4644 	if (srcext == NULL) {
4645 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4646 		return (EINVAL);
4647 	}
4648 	if (dstext == NULL) {
4649 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4650 		return (EINVAL);
4651 	}
4652 	if (assoc == NULL) {
4653 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4654 		return (EINVAL);
4655 	}
4656 
4657 	if (kmcext != NULL) {
4658 		kmp = kmcext->sadb_x_kmc_proto;
4659 		kmc = kmcext->sadb_x_kmc_cookie;
4660 	}
4661 
4662 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4663 	if (ipsapp == NULL) {
4664 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4665 		return (ESRCH);
4666 	}
4667 
4668 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4669 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4670 			/*
4671 			 * REFRELE the target and let the add_sa_func()
4672 			 * deal with updating a larval SA.
4673 			 */
4674 			destroy_ipsa_pair(ipsapp);
4675 			return (add_sa_func(mp, ksi, diagnostic, ns));
4676 		}
4677 	}
4678 
4679 	/*
4680 	 * At this point we have an UPDATE to a MATURE SA. There should
4681 	 * not be any keying material present.
4682 	 */
4683 	if (akey != NULL) {
4684 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4685 		error = EINVAL;
4686 		goto bail;
4687 	}
4688 	if (ekey != NULL) {
4689 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4690 		error = EINVAL;
4691 		goto bail;
4692 	}
4693 
4694 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4695 		if (ipsapp->ipsap_sa_ptr != NULL &&
4696 		    ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4697 			if ((error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4698 			    assoc->sadb_sa_state, NULL)) != 0) {
4699 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4700 				goto bail;
4701 			}
4702 		}
4703 		if (ipsapp->ipsap_psa_ptr != NULL &&
4704 		    ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4705 			if ((error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4706 			    assoc->sadb_sa_state, NULL)) != 0) {
4707 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4708 				goto bail;
4709 			}
4710 		}
4711 	}
4712 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4713 		if (ipsapp->ipsap_sa_ptr != NULL) {
4714 			error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4715 			    assoc->sadb_sa_state,
4716 			    (ipsapp->ipsap_sa_ptr->ipsa_flags &
4717 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4718 			if (error) {
4719 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4720 				goto bail;
4721 			}
4722 		}
4723 		if (ipsapp->ipsap_psa_ptr != NULL) {
4724 			error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4725 			    assoc->sadb_sa_state,
4726 			    (ipsapp->ipsap_psa_ptr->ipsa_flags &
4727 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4728 			if (error) {
4729 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4730 				goto bail;
4731 			}
4732 		}
4733 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4734 		    ksi, echo_target);
4735 		goto bail;
4736 	}
4737 
4738 	/*
4739 	 * Reality checks for updates of active associations.
4740 	 * Sundry first-pass UPDATE-specific reality checks.
4741 	 * Have to do the checks here, because it's after the add_sa code.
4742 	 * XXX STATS : logging/stats here?
4743 	 */
4744 
4745 	if (!((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4746 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4747 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4748 		error = EINVAL;
4749 		goto bail;
4750 	}
4751 
4752 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4753 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4754 		error = EINVAL;
4755 		goto bail;
4756 	}
4757 
4758 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4759 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4760 		error = EOPNOTSUPP;
4761 		goto bail;
4762 	}
4763 
4764 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4765 		error = EINVAL;
4766 		goto bail;
4767 	}
4768 
4769 	if (ipsapp->ipsap_sa_ptr != NULL) {
4770 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4771 			error = ESRCH;	/* DEAD == Not there, in this case. */
4772 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4773 			goto bail;
4774 		}
4775 		if ((kmp != 0) &&
4776 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4777 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4778 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4779 			error = EINVAL;
4780 			goto bail;
4781 		}
4782 		if ((kmc != 0) &&
4783 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4784 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4785 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4786 			error = EINVAL;
4787 			goto bail;
4788 		}
4789 		/*
4790 		 * Do not allow replay value change for MATURE or LARVAL SA.
4791 		 */
4792 
4793 		if ((replext != NULL) &&
4794 		    ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4795 		    (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4796 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4797 			error = EINVAL;
4798 			goto bail;
4799 		}
4800 	}
4801 
4802 	if (ipsapp->ipsap_psa_ptr != NULL) {
4803 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4804 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4805 			error = ESRCH;	/* DEAD == Not there, in this case. */
4806 			goto bail;
4807 		}
4808 		if ((kmp != 0) &&
4809 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4810 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4811 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4812 			error = EINVAL;
4813 			goto bail;
4814 		}
4815 		if ((kmc != 0) &&
4816 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4817 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4818 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4819 			error = EINVAL;
4820 			goto bail;
4821 		}
4822 	}
4823 
4824 	if (ipsapp->ipsap_sa_ptr != NULL) {
4825 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft,
4826 		    idle, B_TRUE);
4827 		if (kmp != 0)
4828 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4829 		if (kmc != 0)
4830 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4831 		if ((replext != NULL) &&
4832 		    (ipsapp->ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4833 			/*
4834 			 * If an inbound SA, update the replay counter
4835 			 * and check off all the other sequence number
4836 			 */
4837 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4838 				if (!sadb_replay_check(ipsapp->ipsap_sa_ptr,
4839 				    replext->sadb_x_rc_replay32)) {
4840 					*diagnostic =
4841 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4842 					error = EINVAL;
4843 					goto bail;
4844 				}
4845 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4846 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4847 				    current +
4848 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4849 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4850 			} else {
4851 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4852 				ipsapp->ipsap_sa_ptr->ipsa_replay =
4853 				    replext->sadb_x_rc_replay32;
4854 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4855 				    current +
4856 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4857 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4858 			}
4859 		}
4860 	}
4861 
4862 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4863 		if (ipsapp->ipsap_psa_ptr != NULL) {
4864 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4865 			    idle, B_FALSE);
4866 			if (kmp != 0)
4867 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4868 			if (kmc != 0)
4869 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4870 		} else {
4871 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4872 			error = ESRCH;
4873 			goto bail;
4874 		}
4875 	}
4876 
4877 	if (pair_ext != NULL)
4878 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4879 
4880 	if (error == 0)
4881 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4882 		    ksi, echo_target);
4883 bail:
4884 
4885 	destroy_ipsa_pair(ipsapp);
4886 
4887 	return (error);
4888 }
4889 
4890 
4891 int
4892 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4893     sadbp_t *spp)
4894 {
4895 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4896 	sadb_address_t *srcext =
4897 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4898 	sadb_address_t *dstext =
4899 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4900 	sadb_x_pair_t *pair_ext =
4901 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4902 	int error = 0;
4903 	ipsap_t *oipsapp = NULL;
4904 	boolean_t undo_pair = B_FALSE;
4905 	uint32_t ipsa_flags;
4906 
4907 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4908 	    assoc->sadb_sa_spi) {
4909 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4910 		return (EINVAL);
4911 	}
4912 
4913 	/*
4914 	 * Assume for now that the spi value provided in the SADB_UPDATE
4915 	 * message was valid, update the SA with its pair spi value.
4916 	 * If the spi turns out to be bogus or the SA no longer exists
4917 	 * then this will be detected when the reverse update is made
4918 	 * below.
4919 	 */
4920 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4921 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4922 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4923 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4924 
4925 	/*
4926 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4927 	 * should now return pointers to the SA *AND* its pair, if this is not
4928 	 * the case, the "otherspi" either did not exist or was deleted. Also
4929 	 * check that "otherspi" is not already paired. If everything looks
4930 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4931 	 * after this update to ensure its not deleted until we are done.
4932 	 */
4933 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4934 	if (oipsapp == NULL) {
4935 		/*
4936 		 * This should never happen, calling function still has
4937 		 * IPSA_REFHELD on the SA we just updated.
4938 		 */
4939 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4940 		return (EINVAL);
4941 	}
4942 
4943 	if (oipsapp->ipsap_psa_ptr == NULL) {
4944 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4945 		error = EINVAL;
4946 		undo_pair = B_TRUE;
4947 	} else {
4948 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4949 		if ((oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4950 		    (oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4951 			/* Its dead Jim! */
4952 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4953 			undo_pair = B_TRUE;
4954 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4955 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4956 			/* This SA is in both hashtables. */
4957 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4958 			undo_pair = B_TRUE;
4959 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4960 			/* This SA is already paired with another. */
4961 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4962 			undo_pair = B_TRUE;
4963 		}
4964 	}
4965 
4966 	if (undo_pair) {
4967 		/* The pair SA does not exist. */
4968 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4969 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4970 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4971 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4972 	} else {
4973 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4974 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4975 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4976 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4977 	}
4978 
4979 	destroy_ipsa_pair(oipsapp);
4980 	return (error);
4981 }
4982 
4983 /*
4984  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4985  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4986  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4987  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4988  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4989  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4990  * other direction's SA.
4991  */
4992 
4993 /*
4994  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4995  * grab it, lock it, and return it.  Otherwise return NULL.
4996  */
4997 static ipsacq_t *
4998 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4999     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
5000     uint64_t unique_id)
5001 {
5002 	ipsacq_t *walker;
5003 	sa_family_t fam;
5004 	uint32_t blank_address[4] = {0, 0, 0, 0};
5005 
5006 	if (isrc == NULL) {
5007 		ASSERT(idst == NULL);
5008 		isrc = idst = blank_address;
5009 	}
5010 
5011 	/*
5012 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
5013 	 *
5014 	 * XXX May need search for duplicates based on other things too!
5015 	 */
5016 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
5017 	    walker = walker->ipsacq_next) {
5018 		mutex_enter(&walker->ipsacq_lock);
5019 		fam = walker->ipsacq_addrfam;
5020 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
5021 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
5022 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
5023 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
5024 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
5025 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
5026 		    (ap == walker->ipsacq_act) &&
5027 		    (pp == walker->ipsacq_policy) &&
5028 		    /* XXX do deep compares of ap/pp? */
5029 		    (unique_id == walker->ipsacq_unique_id))
5030 			break;			/* everything matched */
5031 		mutex_exit(&walker->ipsacq_lock);
5032 	}
5033 
5034 	return (walker);
5035 }
5036 
5037 /*
5038  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5039  * of all of the same length.  Give up (and drop) if memory
5040  * cannot be allocated for a new one; otherwise, invoke callback to
5041  * send the acquire up..
5042  *
5043  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5044  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
5045  * and handle this case specially.
5046  */
5047 void
5048 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
5049 {
5050 	sadbp_t *spp;
5051 	sadb_t *sp;
5052 	ipsacq_t *newbie;
5053 	iacqf_t *bucket;
5054 	mblk_t *datamp = mp->b_cont;
5055 	mblk_t *extended;
5056 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5057 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5058 	uint32_t *src, *dst, *isrc, *idst;
5059 	ipsec_policy_t *pp = io->ipsec_out_policy;
5060 	ipsec_action_t *ap = io->ipsec_out_act;
5061 	sa_family_t af;
5062 	int hashoffset;
5063 	uint32_t seq;
5064 	uint64_t unique_id = 0;
5065 	ipsec_selector_t sel;
5066 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
5067 	netstack_t	*ns = io->ipsec_out_ns;
5068 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5069 
5070 	ASSERT((pp != NULL) || (ap != NULL));
5071 
5072 	ASSERT(need_ah != NULL || need_esp != NULL);
5073 	/* Assign sadb pointers */
5074 	if (need_esp) { /* ESP for AH+ESP */
5075 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5076 
5077 		spp = &espstack->esp_sadb;
5078 	} else {
5079 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5080 
5081 		spp = &ahstack->ah_sadb;
5082 	}
5083 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
5084 
5085 	if (ap == NULL)
5086 		ap = pp->ipsp_act;
5087 
5088 	ASSERT(ap != NULL);
5089 
5090 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5091 		unique_id = SA_FORM_UNIQUE_ID(io);
5092 
5093 	/*
5094 	 * Set up an ACQUIRE record.
5095 	 *
5096 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5097 	 * below the lowest point allowed in the kernel.  (In other words,
5098 	 * make sure the high bit on the sequence number is set.)
5099 	 */
5100 
5101 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5102 
5103 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5104 		src = (uint32_t *)&ipha->ipha_src;
5105 		dst = (uint32_t *)&ipha->ipha_dst;
5106 		af = AF_INET;
5107 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5108 		ASSERT(io->ipsec_out_v4 == B_TRUE);
5109 	} else {
5110 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5111 		src = (uint32_t *)&ip6h->ip6_src;
5112 		dst = (uint32_t *)&ip6h->ip6_dst;
5113 		af = AF_INET6;
5114 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5115 		ASSERT(io->ipsec_out_v4 == B_FALSE);
5116 	}
5117 
5118 	if (tunnel_mode) {
5119 		if (pp == NULL) {
5120 			/*
5121 			 * Tunnel mode with no policy pointer means this is a
5122 			 * reflected ICMP (like a ECHO REQUEST) that came in
5123 			 * with self-encapsulated protection.  Until we better
5124 			 * support this, drop the packet.
5125 			 */
5126 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5127 			    DROPPER(ipss, ipds_spd_got_selfencap),
5128 			    &ipss->ipsec_spd_dropper);
5129 			return;
5130 		}
5131 		/* Snag inner addresses. */
5132 		isrc = io->ipsec_out_insrc;
5133 		idst = io->ipsec_out_indst;
5134 	} else {
5135 		isrc = idst = NULL;
5136 	}
5137 
5138 	/*
5139 	 * Check buckets to see if there is an existing entry.  If so,
5140 	 * grab it.  sadb_checkacquire locks newbie if found.
5141 	 */
5142 	bucket = &(sp->sdb_acq[hashoffset]);
5143 	mutex_enter(&bucket->iacqf_lock);
5144 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5145 	    unique_id);
5146 
5147 	if (newbie == NULL) {
5148 		/*
5149 		 * Otherwise, allocate a new one.
5150 		 */
5151 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5152 		if (newbie == NULL) {
5153 			mutex_exit(&bucket->iacqf_lock);
5154 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5155 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5156 			    &ipss->ipsec_sadb_dropper);
5157 			return;
5158 		}
5159 		newbie->ipsacq_policy = pp;
5160 		if (pp != NULL) {
5161 			IPPOL_REFHOLD(pp);
5162 		}
5163 		IPACT_REFHOLD(ap);
5164 		newbie->ipsacq_act = ap;
5165 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5166 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5167 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5168 		if (newbie->ipsacq_next != NULL)
5169 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5170 		bucket->iacqf_ipsacq = newbie;
5171 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5172 		mutex_enter(&newbie->ipsacq_lock);
5173 	}
5174 
5175 	mutex_exit(&bucket->iacqf_lock);
5176 
5177 	/*
5178 	 * This assert looks silly for now, but we may need to enter newbie's
5179 	 * mutex during a search.
5180 	 */
5181 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5182 
5183 	mp->b_next = NULL;
5184 	/* Queue up packet.  Use b_next. */
5185 	if (newbie->ipsacq_numpackets == 0) {
5186 		/* First one. */
5187 		newbie->ipsacq_mp = mp;
5188 		newbie->ipsacq_numpackets = 1;
5189 		newbie->ipsacq_expire = gethrestime_sec();
5190 		/*
5191 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5192 		 * value.
5193 		 */
5194 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5195 		newbie->ipsacq_seq = seq;
5196 		newbie->ipsacq_addrfam = af;
5197 
5198 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
5199 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
5200 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
5201 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
5202 		if (tunnel_mode) {
5203 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
5204 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
5205 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5206 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
5207 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
5208 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5209 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
5210 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5211 			    io->ipsec_out_indst, io->ipsec_out_inaf);
5212 		} else {
5213 			newbie->ipsacq_proto = io->ipsec_out_proto;
5214 		}
5215 		newbie->ipsacq_unique_id = unique_id;
5216 	} else {
5217 		/* Scan to the end of the list & insert. */
5218 		mblk_t *lastone = newbie->ipsacq_mp;
5219 
5220 		while (lastone->b_next != NULL)
5221 			lastone = lastone->b_next;
5222 		lastone->b_next = mp;
5223 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5224 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5225 			lastone = newbie->ipsacq_mp;
5226 			newbie->ipsacq_mp = lastone->b_next;
5227 			lastone->b_next = NULL;
5228 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
5229 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5230 			    &ipss->ipsec_sadb_dropper);
5231 		} else {
5232 			IP_ACQUIRE_STAT(ipss, qhiwater,
5233 			    newbie->ipsacq_numpackets);
5234 		}
5235 	}
5236 
5237 	/*
5238 	 * Reset addresses.  Set them to the most recently added mblk chain,
5239 	 * so that the address pointers in the acquire record will point
5240 	 * at an mblk still attached to the acquire list.
5241 	 */
5242 
5243 	newbie->ipsacq_srcaddr = src;
5244 	newbie->ipsacq_dstaddr = dst;
5245 
5246 	/*
5247 	 * If the acquire record has more than one queued packet, we've
5248 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5249 	 */
5250 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5251 		/* I have an acquire outstanding already! */
5252 		mutex_exit(&newbie->ipsacq_lock);
5253 		return;
5254 	}
5255 
5256 	if (keysock_extended_reg(ns)) {
5257 		/*
5258 		 * Construct an extended ACQUIRE.  There are logging
5259 		 * opportunities here in failure cases.
5260 		 */
5261 
5262 		(void) memset(&sel, 0, sizeof (sel));
5263 		sel.ips_isv4 = io->ipsec_out_v4;
5264 		if (tunnel_mode) {
5265 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
5266 			    IPPROTO_ENCAP : IPPROTO_IPV6;
5267 		} else {
5268 			sel.ips_protocol = io->ipsec_out_proto;
5269 			sel.ips_local_port = io->ipsec_out_src_port;
5270 			sel.ips_remote_port = io->ipsec_out_dst_port;
5271 		}
5272 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
5273 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
5274 		sel.ips_is_icmp_inv_acq = 0;
5275 		if (af == AF_INET) {
5276 			sel.ips_local_addr_v4 = ipha->ipha_src;
5277 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
5278 		} else {
5279 			sel.ips_local_addr_v6 = ip6h->ip6_src;
5280 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5281 		}
5282 
5283 		extended = sadb_keysock_out(0);
5284 		if (extended != NULL) {
5285 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
5286 			    tunnel_mode, seq, 0, ns);
5287 			if (extended->b_cont == NULL) {
5288 				freeb(extended);
5289 				extended = NULL;
5290 			}
5291 		}
5292 	} else
5293 		extended = NULL;
5294 
5295 	/*
5296 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5297 	 * this new record.  The send-acquire callback assumes that acqrec is
5298 	 * already locked.
5299 	 */
5300 	(*spp->s_acqfn)(newbie, extended, ns);
5301 }
5302 
5303 /*
5304  * Unlink and free an acquire record.
5305  */
5306 void
5307 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5308 {
5309 	mblk_t *mp;
5310 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5311 
5312 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5313 
5314 	if (acqrec->ipsacq_policy != NULL) {
5315 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
5316 	}
5317 	if (acqrec->ipsacq_act != NULL) {
5318 		IPACT_REFRELE(acqrec->ipsacq_act);
5319 	}
5320 
5321 	/* Unlink */
5322 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5323 	if (acqrec->ipsacq_next != NULL)
5324 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5325 
5326 	/*
5327 	 * Free hanging mp's.
5328 	 *
5329 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5330 	 */
5331 
5332 	mutex_enter(&acqrec->ipsacq_lock);
5333 	while (acqrec->ipsacq_mp != NULL) {
5334 		mp = acqrec->ipsacq_mp;
5335 		acqrec->ipsacq_mp = mp->b_next;
5336 		mp->b_next = NULL;
5337 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
5338 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5339 		    &ipss->ipsec_sadb_dropper);
5340 	}
5341 	mutex_exit(&acqrec->ipsacq_lock);
5342 
5343 	/* Free */
5344 	mutex_destroy(&acqrec->ipsacq_lock);
5345 	kmem_free(acqrec, sizeof (*acqrec));
5346 }
5347 
5348 /*
5349  * Destroy an acquire list fanout.
5350  */
5351 static void
5352 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5353     netstack_t *ns)
5354 {
5355 	int i;
5356 	iacqf_t *list = *listp;
5357 
5358 	if (list == NULL)
5359 		return;
5360 
5361 	for (i = 0; i < numentries; i++) {
5362 		mutex_enter(&(list[i].iacqf_lock));
5363 		while (list[i].iacqf_ipsacq != NULL)
5364 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5365 		mutex_exit(&(list[i].iacqf_lock));
5366 		if (forever)
5367 			mutex_destroy(&(list[i].iacqf_lock));
5368 	}
5369 
5370 	if (forever) {
5371 		*listp = NULL;
5372 		kmem_free(list, numentries * sizeof (*list));
5373 	}
5374 }
5375 
5376 /*
5377  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5378  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5379  */
5380 static uint8_t *
5381 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5382     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5383     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5384 {
5385 	uint8_t *cur = start;
5386 	ipsec_alginfo_t *algp;
5387 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5388 
5389 	cur += sizeof (*algdesc);
5390 	if (cur >= limit)
5391 		return (NULL);
5392 
5393 	ecomb->sadb_x_ecomb_numalgs++;
5394 
5395 	/*
5396 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5397 	 * a stronger policy, and when the framework loads a stronger version,
5398 	 * you can just keep plowing w/o rewhacking your SPD.
5399 	 */
5400 	mutex_enter(&ipss->ipsec_alg_lock);
5401 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5402 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5403 	if (algp == NULL) {
5404 		mutex_exit(&ipss->ipsec_alg_lock);
5405 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5406 	}
5407 	if (minbits < algp->alg_ef_minbits)
5408 		minbits = algp->alg_ef_minbits;
5409 	if (maxbits > algp->alg_ef_maxbits)
5410 		maxbits = algp->alg_ef_maxbits;
5411 	mutex_exit(&ipss->ipsec_alg_lock);
5412 
5413 	algdesc->sadb_x_algdesc_satype = satype;
5414 	algdesc->sadb_x_algdesc_algtype = algtype;
5415 	algdesc->sadb_x_algdesc_alg = alg;
5416 	algdesc->sadb_x_algdesc_minbits = minbits;
5417 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5418 	algdesc->sadb_x_algdesc_reserved = 0;
5419 	return (cur);
5420 }
5421 
5422 /*
5423  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5424  * which must fit before *limit
5425  *
5426  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5427  */
5428 static uint8_t *
5429 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5430     netstack_t *ns)
5431 {
5432 	uint8_t *cur = start;
5433 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5434 	ipsec_prot_t *ipp;
5435 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5436 
5437 	cur += sizeof (*ecomb);
5438 	if (cur >= limit)
5439 		return (NULL);
5440 
5441 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5442 
5443 	ipp = &act->ipa_act.ipa_apply;
5444 
5445 	ecomb->sadb_x_ecomb_numalgs = 0;
5446 	ecomb->sadb_x_ecomb_reserved = 0;
5447 	ecomb->sadb_x_ecomb_reserved2 = 0;
5448 	/*
5449 	 * No limits on allocations, since we really don't support that
5450 	 * concept currently.
5451 	 */
5452 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5453 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5454 
5455 	/*
5456 	 * XXX TBD: Policy or global parameters will eventually be
5457 	 * able to fill in some of these.
5458 	 */
5459 	ecomb->sadb_x_ecomb_flags = 0;
5460 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5461 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5462 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5463 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5464 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5465 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5466 
5467 	if (ipp->ipp_use_ah) {
5468 		cur = sadb_new_algdesc(cur, limit, ecomb,
5469 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5470 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5471 		if (cur == NULL)
5472 			return (NULL);
5473 		ipsecah_fill_defs(ecomb, ns);
5474 	}
5475 
5476 	if (ipp->ipp_use_esp) {
5477 		if (ipp->ipp_use_espa) {
5478 			cur = sadb_new_algdesc(cur, limit, ecomb,
5479 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5480 			    ipp->ipp_esp_auth_alg,
5481 			    ipp->ipp_espa_minbits,
5482 			    ipp->ipp_espa_maxbits, ipss);
5483 			if (cur == NULL)
5484 				return (NULL);
5485 		}
5486 
5487 		cur = sadb_new_algdesc(cur, limit, ecomb,
5488 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5489 		    ipp->ipp_encr_alg,
5490 		    ipp->ipp_espe_minbits,
5491 		    ipp->ipp_espe_maxbits, ipss);
5492 		if (cur == NULL)
5493 			return (NULL);
5494 		/* Fill in lifetimes if and only if AH didn't already... */
5495 		if (!ipp->ipp_use_ah)
5496 			ipsecesp_fill_defs(ecomb, ns);
5497 	}
5498 
5499 	return (cur);
5500 }
5501 
5502 /*
5503  * Construct an extended ACQUIRE message based on a selector and the resulting
5504  * IPsec action.
5505  *
5506  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5507  * generation. As a consequence, expect this function to evolve
5508  * rapidly.
5509  */
5510 static mblk_t *
5511 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5512     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5513     netstack_t *ns)
5514 {
5515 	mblk_t *mp;
5516 	sadb_msg_t *samsg;
5517 	uint8_t *start, *cur, *end;
5518 	uint32_t *saddrptr, *daddrptr;
5519 	sa_family_t af;
5520 	sadb_prop_t *eprop;
5521 	ipsec_action_t *ap, *an;
5522 	ipsec_selkey_t *ipsl;
5523 	uint8_t proto, pfxlen;
5524 	uint16_t lport, rport;
5525 	uint32_t kmp, kmc;
5526 
5527 	/*
5528 	 * Find the action we want sooner rather than later..
5529 	 */
5530 	an = NULL;
5531 	if (pol == NULL) {
5532 		ap = act;
5533 	} else {
5534 		ap = pol->ipsp_act;
5535 
5536 		if (ap != NULL)
5537 			an = ap->ipa_next;
5538 	}
5539 
5540 	/*
5541 	 * Just take a swag for the allocation for now.	 We can always
5542 	 * alter it later.
5543 	 */
5544 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5545 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5546 	if (mp == NULL)
5547 		return (NULL);
5548 
5549 	start = mp->b_rptr;
5550 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5551 
5552 	cur = start;
5553 
5554 	samsg = (sadb_msg_t *)cur;
5555 	cur += sizeof (*samsg);
5556 
5557 	samsg->sadb_msg_version = PF_KEY_V2;
5558 	samsg->sadb_msg_type = SADB_ACQUIRE;
5559 	samsg->sadb_msg_errno = 0;
5560 	samsg->sadb_msg_reserved = 0;
5561 	samsg->sadb_msg_satype = 0;
5562 	samsg->sadb_msg_seq = seq;
5563 	samsg->sadb_msg_pid = pid;
5564 
5565 	if (tunnel_mode) {
5566 		/*
5567 		 * Form inner address extensions based NOT on the inner
5568 		 * selectors (i.e. the packet data), but on the policy's
5569 		 * selector key (i.e. the policy's selector information).
5570 		 *
5571 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5572 		 * same in ipsec_selkey_t (unless the compiler does very
5573 		 * strange things with unions, consult your local C language
5574 		 * lawyer for details).
5575 		 */
5576 		ASSERT(pol != NULL);
5577 
5578 		ipsl = &(pol->ipsp_sel->ipsl_key);
5579 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5580 			af = AF_INET;
5581 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5582 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5583 		} else {
5584 			af = AF_INET6;
5585 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5586 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5587 		}
5588 
5589 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5590 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5591 			pfxlen = ipsl->ipsl_local_pfxlen;
5592 		} else {
5593 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5594 			pfxlen = 0;
5595 		}
5596 		/* XXX What about ICMP type/code? */
5597 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5598 		    ipsl->ipsl_lport : 0;
5599 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5600 		    ipsl->ipsl_proto : 0;
5601 
5602 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5603 		    af, saddrptr, lport, proto, pfxlen);
5604 		if (cur == NULL) {
5605 			freeb(mp);
5606 			return (NULL);
5607 		}
5608 
5609 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5610 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5611 			pfxlen = ipsl->ipsl_remote_pfxlen;
5612 		} else {
5613 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5614 			pfxlen = 0;
5615 		}
5616 		/* XXX What about ICMP type/code? */
5617 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5618 		    ipsl->ipsl_rport : 0;
5619 
5620 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5621 		    af, daddrptr, rport, proto, pfxlen);
5622 		if (cur == NULL) {
5623 			freeb(mp);
5624 			return (NULL);
5625 		}
5626 		/*
5627 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5628 		 * _with_ inner-packet address selectors, we'll need to further
5629 		 * distinguish tunnel mode here.  For now, having inner
5630 		 * addresses and/or ports is sufficient.
5631 		 *
5632 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5633 		 * outer addresses.
5634 		 */
5635 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5636 		lport = rport = 0;
5637 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5638 		proto = 0;
5639 		lport = 0;
5640 		rport = 0;
5641 		if (pol != NULL) {
5642 			ipsl = &(pol->ipsp_sel->ipsl_key);
5643 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5644 				proto = ipsl->ipsl_proto;
5645 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5646 				rport = ipsl->ipsl_rport;
5647 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5648 				lport = ipsl->ipsl_lport;
5649 		}
5650 	} else {
5651 		proto = sel->ips_protocol;
5652 		lport = sel->ips_local_port;
5653 		rport = sel->ips_remote_port;
5654 	}
5655 
5656 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5657 
5658 	/*
5659 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5660 	 * ipsec_selector_t.
5661 	 */
5662 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5663 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5664 
5665 	if (cur == NULL) {
5666 		freeb(mp);
5667 		return (NULL);
5668 	}
5669 
5670 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5671 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5672 
5673 	if (cur == NULL) {
5674 		freeb(mp);
5675 		return (NULL);
5676 	}
5677 
5678 	/*
5679 	 * This section will change a lot as policy evolves.
5680 	 * For now, it'll be relatively simple.
5681 	 */
5682 	eprop = (sadb_prop_t *)cur;
5683 	cur += sizeof (*eprop);
5684 	if (cur > end) {
5685 		/* no space left */
5686 		freeb(mp);
5687 		return (NULL);
5688 	}
5689 
5690 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5691 	eprop->sadb_x_prop_ereserved = 0;
5692 	eprop->sadb_x_prop_numecombs = 0;
5693 	eprop->sadb_prop_replay = 32;	/* default */
5694 
5695 	kmc = kmp = 0;
5696 
5697 	for (; ap != NULL; ap = an) {
5698 		an = (pol != NULL) ? ap->ipa_next : NULL;
5699 
5700 		/*
5701 		 * Skip non-IPsec policies
5702 		 */
5703 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5704 			continue;
5705 
5706 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5707 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5708 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5709 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5710 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5711 			eprop->sadb_prop_replay =
5712 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5713 		}
5714 
5715 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5716 		if (cur == NULL) { /* no space */
5717 			freeb(mp);
5718 			return (NULL);
5719 		}
5720 		eprop->sadb_x_prop_numecombs++;
5721 	}
5722 
5723 	if (eprop->sadb_x_prop_numecombs == 0) {
5724 		/*
5725 		 * This will happen if we fail to find a policy
5726 		 * allowing for IPsec processing.
5727 		 * Construct an error message.
5728 		 */
5729 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5730 		samsg->sadb_msg_errno = ENOENT;
5731 		samsg->sadb_x_msg_diagnostic = 0;
5732 		return (mp);
5733 	}
5734 
5735 	if ((kmp != 0) || (kmc != 0)) {
5736 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5737 		if (cur == NULL) {
5738 			freeb(mp);
5739 			return (NULL);
5740 		}
5741 	}
5742 
5743 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5744 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5745 	mp->b_wptr = cur;
5746 
5747 	return (mp);
5748 }
5749 
5750 /*
5751  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5752  *
5753  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5754  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5755  * maximize code consolidation while preventing algorithm changes from messing
5756  * with the callers finishing touches on the ACQUIRE itself.
5757  */
5758 mblk_t *
5759 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5760 {
5761 	uint_t allocsize;
5762 	mblk_t *pfkeymp, *msgmp;
5763 	sa_family_t af;
5764 	uint8_t *cur, *end;
5765 	sadb_msg_t *samsg;
5766 	uint16_t sport_typecode;
5767 	uint16_t dport_typecode;
5768 	uint8_t check_proto;
5769 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5770 
5771 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5772 
5773 	pfkeymp = sadb_keysock_out(0);
5774 	if (pfkeymp == NULL)
5775 		return (NULL);
5776 
5777 	/*
5778 	 * First, allocate a basic ACQUIRE message
5779 	 */
5780 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5781 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5782 
5783 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5784 	allocsize += 2 * sizeof (struct sockaddr_in6);
5785 
5786 	mutex_enter(&ipss->ipsec_alg_lock);
5787 	/* NOTE:  The lock is now held through to this function's return. */
5788 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5789 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5790 
5791 	if (tunnel_mode) {
5792 		/* Tunnel mode! */
5793 		allocsize += 2 * sizeof (sadb_address_t);
5794 		/* Enough to cover both AF_INET and AF_INET6. */
5795 		allocsize += 2 * sizeof (struct sockaddr_in6);
5796 	}
5797 
5798 	msgmp = allocb(allocsize, BPRI_HI);
5799 	if (msgmp == NULL) {
5800 		freeb(pfkeymp);
5801 		mutex_exit(&ipss->ipsec_alg_lock);
5802 		return (NULL);
5803 	}
5804 
5805 	pfkeymp->b_cont = msgmp;
5806 	cur = msgmp->b_rptr;
5807 	end = cur + allocsize;
5808 	samsg = (sadb_msg_t *)cur;
5809 	cur += sizeof (sadb_msg_t);
5810 
5811 	af = acqrec->ipsacq_addrfam;
5812 	switch (af) {
5813 	case AF_INET:
5814 		check_proto = IPPROTO_ICMP;
5815 		break;
5816 	case AF_INET6:
5817 		check_proto = IPPROTO_ICMPV6;
5818 		break;
5819 	default:
5820 		/* This should never happen unless we have kernel bugs. */
5821 		cmn_err(CE_WARN,
5822 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5823 		ASSERT(0);
5824 		mutex_exit(&ipss->ipsec_alg_lock);
5825 		return (NULL);
5826 	}
5827 
5828 	samsg->sadb_msg_version = PF_KEY_V2;
5829 	samsg->sadb_msg_type = SADB_ACQUIRE;
5830 	samsg->sadb_msg_satype = satype;
5831 	samsg->sadb_msg_errno = 0;
5832 	samsg->sadb_msg_pid = 0;
5833 	samsg->sadb_msg_reserved = 0;
5834 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5835 
5836 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5837 
5838 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5839 		sport_typecode = dport_typecode = 0;
5840 	} else {
5841 		sport_typecode = acqrec->ipsacq_srcport;
5842 		dport_typecode = acqrec->ipsacq_dstport;
5843 	}
5844 
5845 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5846 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5847 
5848 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5849 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5850 
5851 	if (tunnel_mode) {
5852 		sport_typecode = acqrec->ipsacq_srcport;
5853 		dport_typecode = acqrec->ipsacq_dstport;
5854 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5855 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5856 		    sport_typecode, acqrec->ipsacq_inner_proto,
5857 		    acqrec->ipsacq_innersrcpfx);
5858 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5859 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5860 		    dport_typecode, acqrec->ipsacq_inner_proto,
5861 		    acqrec->ipsacq_innerdstpfx);
5862 	}
5863 
5864 	/* XXX Insert identity information here. */
5865 
5866 	/* XXXMLS Insert sensitivity information here. */
5867 
5868 	if (cur != NULL)
5869 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5870 	else
5871 		mutex_exit(&ipss->ipsec_alg_lock);
5872 
5873 	return (pfkeymp);
5874 }
5875 
5876 /*
5877  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5878  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5879  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5880  * (ipsa_t *)-1).
5881  *
5882  * master_spi is passed in host order.
5883  */
5884 ipsa_t *
5885 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5886     netstack_t *ns, uint_t sa_type)
5887 {
5888 	sadb_address_t *src =
5889 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5890 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5891 	sadb_spirange_t *range =
5892 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5893 	struct sockaddr_in *ssa, *dsa;
5894 	struct sockaddr_in6 *ssa6, *dsa6;
5895 	uint32_t *srcaddr, *dstaddr;
5896 	sa_family_t af;
5897 	uint32_t add, min, max;
5898 	uint8_t protocol =
5899 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5900 
5901 	if (src == NULL) {
5902 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5903 		return ((ipsa_t *)-1);
5904 	}
5905 	if (dst == NULL) {
5906 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5907 		return ((ipsa_t *)-1);
5908 	}
5909 	if (range == NULL) {
5910 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5911 		return ((ipsa_t *)-1);
5912 	}
5913 
5914 	min = ntohl(range->sadb_spirange_min);
5915 	max = ntohl(range->sadb_spirange_max);
5916 	dsa = (struct sockaddr_in *)(dst + 1);
5917 	dsa6 = (struct sockaddr_in6 *)dsa;
5918 
5919 	ssa = (struct sockaddr_in *)(src + 1);
5920 	ssa6 = (struct sockaddr_in6 *)ssa;
5921 	ASSERT(dsa->sin_family == ssa->sin_family);
5922 
5923 	srcaddr = ALL_ZEROES_PTR;
5924 	af = dsa->sin_family;
5925 	switch (af) {
5926 	case AF_INET:
5927 		if (src != NULL)
5928 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5929 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5930 		break;
5931 	case AF_INET6:
5932 		if (src != NULL)
5933 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5934 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5935 		break;
5936 	default:
5937 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5938 		return ((ipsa_t *)-1);
5939 	}
5940 
5941 	if (master_spi < min || master_spi > max) {
5942 		/* Return a random value in the range. */
5943 		if (cl_inet_getspi) {
5944 			cl_inet_getspi(ns->netstack_stackid, protocol,
5945 			    (uint8_t *)&add, sizeof (add), NULL);
5946 		} else {
5947 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5948 			    sizeof (add));
5949 		}
5950 		master_spi = min + (add % (max - min + 1));
5951 	}
5952 
5953 	/*
5954 	 * Since master_spi is passed in host order, we need to htonl() it
5955 	 * for the purposes of creating a new SA.
5956 	 */
5957 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5958 	    ns));
5959 }
5960 
5961 /*
5962  *
5963  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5964  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5965  * and scan for the sequence number in question.  I may wish to accept an
5966  * address pair with it, for easier searching.
5967  *
5968  * Caller frees the message, so we don't have to here.
5969  *
5970  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5971  *		failures.
5972  */
5973 /* ARGSUSED */
5974 void
5975 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5976 {
5977 	int i;
5978 	ipsacq_t *acqrec;
5979 	iacqf_t *bucket;
5980 
5981 	/*
5982 	 * I only accept the base header for this!
5983 	 * Though to be honest, requiring the dst address would help
5984 	 * immensely.
5985 	 *
5986 	 * XXX	There are already cases where I can get the dst address.
5987 	 */
5988 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5989 		return;
5990 
5991 	/*
5992 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5993 	 * (and in the future send a message to IP with the appropriate error
5994 	 * number).
5995 	 *
5996 	 * Q: Do I want to reject if pid != 0?
5997 	 */
5998 
5999 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
6000 		bucket = &sp->s_v4.sdb_acq[i];
6001 		mutex_enter(&bucket->iacqf_lock);
6002 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6003 		    acqrec = acqrec->ipsacq_next) {
6004 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6005 				break;	/* for acqrec... loop. */
6006 		}
6007 		if (acqrec != NULL)
6008 			break;	/* for i = 0... loop. */
6009 
6010 		mutex_exit(&bucket->iacqf_lock);
6011 	}
6012 
6013 	if (acqrec == NULL) {
6014 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6015 			bucket = &sp->s_v6.sdb_acq[i];
6016 			mutex_enter(&bucket->iacqf_lock);
6017 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6018 			    acqrec = acqrec->ipsacq_next) {
6019 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6020 					break;	/* for acqrec... loop. */
6021 			}
6022 			if (acqrec != NULL)
6023 				break;	/* for i = 0... loop. */
6024 
6025 			mutex_exit(&bucket->iacqf_lock);
6026 		}
6027 	}
6028 
6029 
6030 	if (acqrec == NULL)
6031 		return;
6032 
6033 	/*
6034 	 * What do I do with the errno and IP?	I may need mp's services a
6035 	 * little more.	 See sadb_destroy_acquire() for future directions
6036 	 * beyond free the mblk chain on the acquire record.
6037 	 */
6038 
6039 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6040 	sadb_destroy_acquire(acqrec, ns);
6041 	/* Have to exit mutex here, because of breaking out of for loop. */
6042 	mutex_exit(&bucket->iacqf_lock);
6043 }
6044 
6045 /*
6046  * The following functions work with the replay windows of an SA.  They assume
6047  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6048  * represents the highest sequence number packet received, and back
6049  * (ipsa->ipsa_replay_wsize) packets.
6050  */
6051 
6052 /*
6053  * Is the replay bit set?
6054  */
6055 static boolean_t
6056 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6057 {
6058 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6059 
6060 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6061 }
6062 
6063 /*
6064  * Shift the bits of the replay window over.
6065  */
6066 static void
6067 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6068 {
6069 	int i;
6070 	int jump = ((shift - 1) >> 6) + 1;
6071 
6072 	if (shift == 0)
6073 		return;
6074 
6075 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6076 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6077 			ipsa->ipsa_replay_arr[i + jump] |=
6078 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6079 		}
6080 		ipsa->ipsa_replay_arr[i] <<= shift;
6081 	}
6082 }
6083 
6084 /*
6085  * Set a bit in the bit vector.
6086  */
6087 static void
6088 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6089 {
6090 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6091 
6092 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6093 }
6094 
6095 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6096 
6097 /*
6098  * Assume caller has NOT done ntohl() already on seq.  Check to see
6099  * if replay sequence number "seq" has been seen already.
6100  */
6101 boolean_t
6102 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6103 {
6104 	boolean_t rc;
6105 	uint32_t diff;
6106 
6107 	if (ipsa->ipsa_replay_wsize == 0)
6108 		return (B_TRUE);
6109 
6110 	/*
6111 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6112 	 */
6113 
6114 	/* Convert sequence number into host order before holding the mutex. */
6115 	seq = ntohl(seq);
6116 
6117 	mutex_enter(&ipsa->ipsa_lock);
6118 
6119 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6120 	if (ipsa->ipsa_replay == 0)
6121 		ipsa->ipsa_replay = 1;
6122 
6123 	if (seq > ipsa->ipsa_replay) {
6124 		/*
6125 		 * I have received a new "highest value received".  Shift
6126 		 * the replay window over.
6127 		 */
6128 		diff = seq - ipsa->ipsa_replay;
6129 		if (diff < ipsa->ipsa_replay_wsize) {
6130 			/* In replay window, shift bits over. */
6131 			ipsa_shift_replay(ipsa, diff);
6132 		} else {
6133 			/* WAY FAR AHEAD, clear bits and start again. */
6134 			bzero(ipsa->ipsa_replay_arr,
6135 			    sizeof (ipsa->ipsa_replay_arr));
6136 		}
6137 		ipsa_set_replay(ipsa, 0);
6138 		ipsa->ipsa_replay = seq;
6139 		rc = B_TRUE;
6140 		goto done;
6141 	}
6142 	diff = ipsa->ipsa_replay - seq;
6143 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6144 		rc = B_FALSE;
6145 		goto done;
6146 	}
6147 	/* Set this packet as seen. */
6148 	ipsa_set_replay(ipsa, diff);
6149 
6150 	rc = B_TRUE;
6151 done:
6152 	mutex_exit(&ipsa->ipsa_lock);
6153 	return (rc);
6154 }
6155 
6156 /*
6157  * "Peek" and see if we should even bother going through the effort of
6158  * running an authentication check on the sequence number passed in.
6159  * this takes into account packets that are below the replay window,
6160  * and collisions with already replayed packets.  Return B_TRUE if it
6161  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6162  * Assume same byte-ordering as sadb_replay_check.
6163  */
6164 boolean_t
6165 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6166 {
6167 	boolean_t rc = B_FALSE;
6168 	uint32_t diff;
6169 
6170 	if (ipsa->ipsa_replay_wsize == 0)
6171 		return (B_TRUE);
6172 
6173 	/*
6174 	 * 0 is 0, regardless of byte order... :)
6175 	 *
6176 	 * If I get 0 on the wire (and there is a replay window) then the
6177 	 * sender most likely wrapped.	This ipsa may need to be marked or
6178 	 * something.
6179 	 */
6180 	if (seq == 0)
6181 		return (B_FALSE);
6182 
6183 	seq = ntohl(seq);
6184 	mutex_enter(&ipsa->ipsa_lock);
6185 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6186 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6187 		goto done;
6188 
6189 	/*
6190 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6191 	 * bother with formalities.  I'm not accepting any more packets
6192 	 * on this SA.
6193 	 */
6194 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6195 		/*
6196 		 * Since we're already holding the lock, update the
6197 		 * expire time ala. sadb_replay_delete() and return.
6198 		 */
6199 		ipsa->ipsa_hardexpiretime = (time_t)1;
6200 		goto done;
6201 	}
6202 
6203 	if (seq <= ipsa->ipsa_replay) {
6204 		/*
6205 		 * This seq is in the replay window.  I'm not below it,
6206 		 * because I already checked for that above!
6207 		 */
6208 		diff = ipsa->ipsa_replay - seq;
6209 		if (ipsa_is_replay_set(ipsa, diff))
6210 			goto done;
6211 	}
6212 	/* Else return B_TRUE, I'm going to advance the window. */
6213 
6214 	rc = B_TRUE;
6215 done:
6216 	mutex_exit(&ipsa->ipsa_lock);
6217 	return (rc);
6218 }
6219 
6220 /*
6221  * Delete a single SA.
6222  *
6223  * For now, use the quick-and-dirty trick of making the association's
6224  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6225  */
6226 void
6227 sadb_replay_delete(ipsa_t *assoc)
6228 {
6229 	mutex_enter(&assoc->ipsa_lock);
6230 	assoc->ipsa_hardexpiretime = (time_t)1;
6231 	mutex_exit(&assoc->ipsa_lock);
6232 }
6233 
6234 /*
6235  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
6236  * down.  The caller will handle the T_BIND_ACK locally.
6237  */
6238 boolean_t
6239 sadb_t_bind_req(queue_t *q, int proto)
6240 {
6241 	struct T_bind_req *tbr;
6242 	mblk_t *mp;
6243 
6244 	mp = allocb_cred(sizeof (struct T_bind_req) + 1, kcred, NOPID);
6245 	if (mp == NULL) {
6246 		/* cmn_err(CE_WARN, */
6247 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
6248 		return (B_FALSE);
6249 	}
6250 	mp->b_datap->db_type = M_PCPROTO;
6251 	tbr = (struct T_bind_req *)mp->b_rptr;
6252 	mp->b_wptr += sizeof (struct T_bind_req);
6253 	tbr->PRIM_type = T_BIND_REQ;
6254 	tbr->ADDR_length = 0;
6255 	tbr->ADDR_offset = 0;
6256 	tbr->CONIND_number = 0;
6257 	*mp->b_wptr = (uint8_t)proto;
6258 	mp->b_wptr++;
6259 
6260 	putnext(q, mp);
6261 	return (B_TRUE);
6262 }
6263 
6264 /*
6265  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6266  * this is designed to take only a format string with "* %x * %s *", so
6267  * that "spi" is printed first, then "addr" is converted using inet_pton().
6268  *
6269  * This is abstracted out to save the stack space for only when inet_pton()
6270  * is called.  Make sure "spi" is in network order; it usually is when this
6271  * would get called.
6272  */
6273 void
6274 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6275     uint32_t spi, void *addr, int af, netstack_t *ns)
6276 {
6277 	char buf[INET6_ADDRSTRLEN];
6278 
6279 	ASSERT(af == AF_INET6 || af == AF_INET);
6280 
6281 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6282 	    inet_ntop(af, addr, buf, sizeof (buf)));
6283 }
6284 
6285 /*
6286  * Fills in a reference to the policy, if any, from the conn, in *ppp
6287  * Releases a reference to the passed conn_t.
6288  */
6289 static void
6290 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6291 {
6292 	ipsec_policy_t	*pp;
6293 	ipsec_latch_t	*ipl = connp->conn_latch;
6294 
6295 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
6296 		pp = ipl->ipl_out_policy;
6297 		IPPOL_REFHOLD(pp);
6298 	} else {
6299 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
6300 		    connp->conn_netstack);
6301 	}
6302 	*ppp = pp;
6303 	CONN_DEC_REF(connp);
6304 }
6305 
6306 /*
6307  * The following functions scan through active conn_t structures
6308  * and return a reference to the best-matching policy it can find.
6309  * Caller must release the reference.
6310  */
6311 static void
6312 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6313 {
6314 	connf_t *connfp;
6315 	conn_t *connp = NULL;
6316 	ipsec_selector_t portonly;
6317 
6318 	bzero((void *)&portonly, sizeof (portonly));
6319 
6320 	if (sel->ips_local_port == 0)
6321 		return;
6322 
6323 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6324 	    ipst)];
6325 	mutex_enter(&connfp->connf_lock);
6326 
6327 	if (sel->ips_isv4) {
6328 		connp = connfp->connf_head;
6329 		while (connp != NULL) {
6330 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6331 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6332 			    sel->ips_remote_addr_v4))
6333 				break;
6334 			connp = connp->conn_next;
6335 		}
6336 
6337 		if (connp == NULL) {
6338 			/* Try port-only match in IPv6. */
6339 			portonly.ips_local_port = sel->ips_local_port;
6340 			sel = &portonly;
6341 		}
6342 	}
6343 
6344 	if (connp == NULL) {
6345 		connp = connfp->connf_head;
6346 		while (connp != NULL) {
6347 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6348 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6349 			    sel->ips_remote_addr_v6))
6350 				break;
6351 			connp = connp->conn_next;
6352 		}
6353 
6354 		if (connp == NULL) {
6355 			mutex_exit(&connfp->connf_lock);
6356 			return;
6357 		}
6358 	}
6359 
6360 	CONN_INC_REF(connp);
6361 	mutex_exit(&connfp->connf_lock);
6362 
6363 	ipsec_conn_pol(sel, connp, ppp);
6364 }
6365 
6366 static conn_t *
6367 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6368 {
6369 	connf_t *connfp;
6370 	conn_t *connp = NULL;
6371 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6372 
6373 	if (sel->ips_local_port == 0)
6374 		return (NULL);
6375 
6376 	connfp = &ipst->ips_ipcl_bind_fanout[
6377 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6378 	mutex_enter(&connfp->connf_lock);
6379 
6380 	if (sel->ips_isv4) {
6381 		connp = connfp->connf_head;
6382 		while (connp != NULL) {
6383 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6384 			    sel->ips_local_addr_v4, pptr[1]))
6385 				break;
6386 			connp = connp->conn_next;
6387 		}
6388 
6389 		if (connp == NULL) {
6390 			/* Match to all-zeroes. */
6391 			v6addrmatch = &ipv6_all_zeros;
6392 		}
6393 	}
6394 
6395 	if (connp == NULL) {
6396 		connp = connfp->connf_head;
6397 		while (connp != NULL) {
6398 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6399 			    *v6addrmatch, pptr[1]))
6400 				break;
6401 			connp = connp->conn_next;
6402 		}
6403 
6404 		if (connp == NULL) {
6405 			mutex_exit(&connfp->connf_lock);
6406 			return (NULL);
6407 		}
6408 	}
6409 
6410 	CONN_INC_REF(connp);
6411 	mutex_exit(&connfp->connf_lock);
6412 	return (connp);
6413 }
6414 
6415 static void
6416 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6417 {
6418 	connf_t 	*connfp;
6419 	conn_t		*connp;
6420 	uint32_t	ports;
6421 	uint16_t	*pptr = (uint16_t *)&ports;
6422 
6423 	/*
6424 	 * Find TCP state in the following order:
6425 	 * 1.) Connected conns.
6426 	 * 2.) Listeners.
6427 	 *
6428 	 * Even though #2 will be the common case for inbound traffic, only
6429 	 * following this order insures correctness.
6430 	 */
6431 
6432 	if (sel->ips_local_port == 0)
6433 		return;
6434 
6435 	/*
6436 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6437 	 * See ipsec_construct_inverse_acquire() for details.
6438 	 */
6439 	pptr[0] = sel->ips_remote_port;
6440 	pptr[1] = sel->ips_local_port;
6441 
6442 	connfp = &ipst->ips_ipcl_conn_fanout[
6443 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6444 	mutex_enter(&connfp->connf_lock);
6445 	connp = connfp->connf_head;
6446 
6447 	if (sel->ips_isv4) {
6448 		while (connp != NULL) {
6449 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6450 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6451 			    ports))
6452 				break;
6453 			connp = connp->conn_next;
6454 		}
6455 	} else {
6456 		while (connp != NULL) {
6457 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6458 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6459 			    ports))
6460 				break;
6461 			connp = connp->conn_next;
6462 		}
6463 	}
6464 
6465 	if (connp != NULL) {
6466 		CONN_INC_REF(connp);
6467 		mutex_exit(&connfp->connf_lock);
6468 	} else {
6469 		mutex_exit(&connfp->connf_lock);
6470 
6471 		/* Try the listen hash. */
6472 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6473 			return;
6474 	}
6475 
6476 	ipsec_conn_pol(sel, connp, ppp);
6477 }
6478 
6479 static void
6480 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6481     ip_stack_t *ipst)
6482 {
6483 	conn_t		*connp;
6484 	uint32_t	ports;
6485 	uint16_t	*pptr = (uint16_t *)&ports;
6486 
6487 	/*
6488 	 * Find SCP state in the following order:
6489 	 * 1.) Connected conns.
6490 	 * 2.) Listeners.
6491 	 *
6492 	 * Even though #2 will be the common case for inbound traffic, only
6493 	 * following this order insures correctness.
6494 	 */
6495 
6496 	if (sel->ips_local_port == 0)
6497 		return;
6498 
6499 	/*
6500 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6501 	 * See ipsec_construct_inverse_acquire() for details.
6502 	 */
6503 	pptr[0] = sel->ips_remote_port;
6504 	pptr[1] = sel->ips_local_port;
6505 
6506 	if (sel->ips_isv4) {
6507 		in6_addr_t	src, dst;
6508 
6509 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6510 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6511 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6512 		    ipst->ips_netstack->netstack_sctp);
6513 	} else {
6514 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6515 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6516 		    ipst->ips_netstack->netstack_sctp);
6517 	}
6518 	if (connp == NULL)
6519 		return;
6520 	ipsec_conn_pol(sel, connp, ppp);
6521 }
6522 
6523 /*
6524  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6525  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6526  * to PF_KEY.
6527  *
6528  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6529  * ignore prefix lengths in the address extension.  Since we match on first-
6530  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6531  * set addresses to mask out the lower bits, we should get a suitable search
6532  * key for the SPD anyway.  This is the function to change if the assumption
6533  * about suitable search keys is wrong.
6534  */
6535 static int
6536 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6537     sadb_address_t *dstext, int *diagnostic)
6538 {
6539 	struct sockaddr_in *src, *dst;
6540 	struct sockaddr_in6 *src6, *dst6;
6541 
6542 	*diagnostic = 0;
6543 
6544 	bzero(sel, sizeof (*sel));
6545 	sel->ips_protocol = srcext->sadb_address_proto;
6546 	dst = (struct sockaddr_in *)(dstext + 1);
6547 	if (dst->sin_family == AF_INET6) {
6548 		dst6 = (struct sockaddr_in6 *)dst;
6549 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6550 		if (src6->sin6_family != AF_INET6) {
6551 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6552 			return (EINVAL);
6553 		}
6554 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6555 		sel->ips_local_addr_v6 = src6->sin6_addr;
6556 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6557 			sel->ips_is_icmp_inv_acq = 1;
6558 		} else {
6559 			sel->ips_remote_port = dst6->sin6_port;
6560 			sel->ips_local_port = src6->sin6_port;
6561 		}
6562 		sel->ips_isv4 = B_FALSE;
6563 	} else {
6564 		src = (struct sockaddr_in *)(srcext + 1);
6565 		if (src->sin_family != AF_INET) {
6566 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6567 			return (EINVAL);
6568 		}
6569 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6570 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6571 		if (sel->ips_protocol == IPPROTO_ICMP) {
6572 			sel->ips_is_icmp_inv_acq = 1;
6573 		} else {
6574 			sel->ips_remote_port = dst->sin_port;
6575 			sel->ips_local_port = src->sin_port;
6576 		}
6577 		sel->ips_isv4 = B_TRUE;
6578 	}
6579 	return (0);
6580 }
6581 
6582 /*
6583  * We have encapsulation.
6584  * - Lookup tun_t by address and look for an associated
6585  *   tunnel policy
6586  * - If there are inner selectors
6587  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6588  *   - Look up tunnel policy based on selectors
6589  * - Else
6590  *   - Sanity check the negotation
6591  *   - If appropriate, fall through to global policy
6592  */
6593 static int
6594 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6595     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6596     int *diagnostic, netstack_t *ns)
6597 {
6598 	int err;
6599 	ipsec_policy_head_t *polhead;
6600 
6601 	/* Check for inner selectors and act appropriately */
6602 
6603 	if (innsrcext != NULL) {
6604 		/* Inner selectors present */
6605 		ASSERT(inndstext != NULL);
6606 		if ((itp == NULL) ||
6607 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6608 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6609 			/*
6610 			 * If inner packet selectors, we must have negotiate
6611 			 * tunnel and active policy.  If the tunnel has
6612 			 * transport-mode policy set on it, or has no policy,
6613 			 * fail.
6614 			 */
6615 			return (ENOENT);
6616 		} else {
6617 			/*
6618 			 * Reset "sel" to indicate inner selectors.  Pass
6619 			 * inner PF_KEY address extensions for this to happen.
6620 			 */
6621 			err = ipsec_get_inverse_acquire_sel(sel,
6622 			    innsrcext, inndstext, diagnostic);
6623 			if (err != 0) {
6624 				ITP_REFRELE(itp, ns);
6625 				return (err);
6626 			}
6627 			/*
6628 			 * Now look for a tunnel policy based on those inner
6629 			 * selectors.  (Common code is below.)
6630 			 */
6631 		}
6632 	} else {
6633 		/* No inner selectors present */
6634 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6635 			/*
6636 			 * Transport mode negotiation with no tunnel policy
6637 			 * configured - return to indicate a global policy
6638 			 * check is needed.
6639 			 */
6640 			if (itp != NULL) {
6641 				ITP_REFRELE(itp, ns);
6642 			}
6643 			return (0);
6644 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6645 			/* Tunnel mode set with no inner selectors. */
6646 			ITP_REFRELE(itp, ns);
6647 			return (ENOENT);
6648 		}
6649 		/*
6650 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6651 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6652 		 * itp with policy set based on any match, so don't bother
6653 		 * changing fields in "sel".
6654 		 */
6655 	}
6656 
6657 	ASSERT(itp != NULL);
6658 	polhead = itp->itp_policy;
6659 	ASSERT(polhead != NULL);
6660 	rw_enter(&polhead->iph_lock, RW_READER);
6661 	*ppp = ipsec_find_policy_head(NULL, polhead,
6662 	    IPSEC_TYPE_INBOUND, sel, ns);
6663 	rw_exit(&polhead->iph_lock);
6664 	ITP_REFRELE(itp, ns);
6665 
6666 	/*
6667 	 * Don't default to global if we didn't find a matching policy entry.
6668 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6669 	 */
6670 	if (*ppp == NULL)
6671 		return (ENOENT);
6672 
6673 	return (0);
6674 }
6675 
6676 static void
6677 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6678     ip_stack_t *ipst)
6679 {
6680 	boolean_t	isv4 = sel->ips_isv4;
6681 	connf_t		*connfp;
6682 	conn_t		*connp;
6683 
6684 	if (isv4) {
6685 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6686 	} else {
6687 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6688 	}
6689 
6690 	mutex_enter(&connfp->connf_lock);
6691 	for (connp = connfp->connf_head; connp != NULL;
6692 	    connp = connp->conn_next) {
6693 		if (!((isv4 && !((connp->conn_src == 0 ||
6694 		    connp->conn_src == sel->ips_local_addr_v4) &&
6695 		    (connp->conn_rem == 0 ||
6696 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6697 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6698 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6699 		    &sel->ips_local_addr_v6)) &&
6700 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6701 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6702 		    &sel->ips_remote_addr_v6)))))) {
6703 			break;
6704 		}
6705 	}
6706 	if (connp == NULL) {
6707 		mutex_exit(&connfp->connf_lock);
6708 		return;
6709 	}
6710 
6711 	CONN_INC_REF(connp);
6712 	mutex_exit(&connfp->connf_lock);
6713 
6714 	ipsec_conn_pol(sel, connp, ppp);
6715 }
6716 
6717 /*
6718  * Construct an inverse ACQUIRE reply based on:
6719  *
6720  * 1.) Current global policy.
6721  * 2.) An conn_t match depending on what all was passed in the extv[].
6722  * 3.) A tunnel's policy head.
6723  * ...
6724  * N.) Other stuff TBD (e.g. identities)
6725  *
6726  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6727  * in this function so the caller can extract them where appropriately.
6728  *
6729  * The SRC address is the local one - just like an outbound ACQUIRE message.
6730  */
6731 mblk_t *
6732 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6733     netstack_t *ns)
6734 {
6735 	int err;
6736 	int diagnostic;
6737 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6738 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6739 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6740 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6741 	struct sockaddr_in6 *src, *dst;
6742 	struct sockaddr_in6 *isrc, *idst;
6743 	ipsec_tun_pol_t *itp = NULL;
6744 	ipsec_policy_t *pp = NULL;
6745 	ipsec_selector_t sel, isel;
6746 	mblk_t *retmp;
6747 	ip_stack_t	*ipst = ns->netstack_ip;
6748 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6749 
6750 	/* Normalize addresses */
6751 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6752 	    == KS_IN_ADDR_UNKNOWN) {
6753 		err = EINVAL;
6754 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6755 		goto bail;
6756 	}
6757 	src = (struct sockaddr_in6 *)(srcext + 1);
6758 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6759 	    == KS_IN_ADDR_UNKNOWN) {
6760 		err = EINVAL;
6761 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6762 		goto bail;
6763 	}
6764 	dst = (struct sockaddr_in6 *)(dstext + 1);
6765 	if (src->sin6_family != dst->sin6_family) {
6766 		err = EINVAL;
6767 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6768 		goto bail;
6769 	}
6770 
6771 	/* Check for tunnel mode and act appropriately */
6772 	if (innsrcext != NULL) {
6773 		if (inndstext == NULL) {
6774 			err = EINVAL;
6775 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6776 			goto bail;
6777 		}
6778 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6779 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6780 			err = EINVAL;
6781 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6782 			goto bail;
6783 		}
6784 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6785 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6786 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6787 			err = EINVAL;
6788 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6789 			goto bail;
6790 		}
6791 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6792 		if (isrc->sin6_family != idst->sin6_family) {
6793 			err = EINVAL;
6794 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6795 			goto bail;
6796 		}
6797 		if (isrc->sin6_family != AF_INET &&
6798 		    isrc->sin6_family != AF_INET6) {
6799 			err = EINVAL;
6800 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6801 			goto bail;
6802 		}
6803 	} else if (inndstext != NULL) {
6804 		err = EINVAL;
6805 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6806 		goto bail;
6807 	}
6808 
6809 	/* Get selectors first, based on outer addresses */
6810 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6811 	if (err != 0)
6812 		goto bail;
6813 
6814 	/* Check for tunnel mode mismatches. */
6815 	if (innsrcext != NULL &&
6816 	    ((isrc->sin6_family == AF_INET &&
6817 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6818 	    (isrc->sin6_family == AF_INET6 &&
6819 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6820 		err = EPROTOTYPE;
6821 		goto bail;
6822 	}
6823 
6824 	/*
6825 	 * Okay, we have the addresses and other selector information.
6826 	 * Let's first find a conn...
6827 	 */
6828 	pp = NULL;
6829 	switch (sel.ips_protocol) {
6830 	case IPPROTO_TCP:
6831 		ipsec_tcp_pol(&sel, &pp, ipst);
6832 		break;
6833 	case IPPROTO_UDP:
6834 		ipsec_udp_pol(&sel, &pp, ipst);
6835 		break;
6836 	case IPPROTO_SCTP:
6837 		ipsec_sctp_pol(&sel, &pp, ipst);
6838 		break;
6839 	case IPPROTO_ENCAP:
6840 	case IPPROTO_IPV6:
6841 		rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER);
6842 		/*
6843 		 * Assume sel.ips_remote_addr_* has the right address at
6844 		 * that exact position.
6845 		 */
6846 		itp = ipss->ipsec_itp_get_byaddr(
6847 		    (uint32_t *)(&sel.ips_local_addr_v6),
6848 		    (uint32_t *)(&sel.ips_remote_addr_v6),
6849 		    src->sin6_family, ns);
6850 		rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
6851 		if (innsrcext == NULL) {
6852 			/*
6853 			 * Transport-mode tunnel, make sure we fake out isel
6854 			 * to contain something based on the outer protocol.
6855 			 */
6856 			bzero(&isel, sizeof (isel));
6857 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6858 		} /* Else isel is initialized by ipsec_tun_pol(). */
6859 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6860 		    &diagnostic, ns);
6861 		/*
6862 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6863 		 * may be.
6864 		 */
6865 		if (err != 0)
6866 			goto bail;
6867 		break;
6868 	default:
6869 		ipsec_oth_pol(&sel, &pp, ipst);
6870 		break;
6871 	}
6872 
6873 	/*
6874 	 * If we didn't find a matching conn_t or other policy head, take a
6875 	 * look in the global policy.
6876 	 */
6877 	if (pp == NULL) {
6878 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6879 		    ns);
6880 		if (pp == NULL) {
6881 			/* There's no global policy. */
6882 			err = ENOENT;
6883 			diagnostic = 0;
6884 			goto bail;
6885 		}
6886 	}
6887 
6888 	/*
6889 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6890 	 * message based on that, fix fields where appropriate,
6891 	 * and return the message.
6892 	 */
6893 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6894 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6895 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6896 	if (pp != NULL) {
6897 		IPPOL_REFRELE(pp, ns);
6898 	}
6899 	if (retmp != NULL) {
6900 		return (retmp);
6901 	} else {
6902 		err = ENOMEM;
6903 		diagnostic = 0;
6904 	}
6905 bail:
6906 	samsg->sadb_msg_errno = (uint8_t)err;
6907 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6908 	return (NULL);
6909 }
6910 
6911 /*
6912  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6913  * functions.  They have to hold the ipsa_lock because of potential races
6914  * between key management using SADB_UPDATE, and inbound packets that may
6915  * queue up on the larval SA (hence the 'l' in "lpkt").
6916  */
6917 
6918 /*
6919  * sadb_set_lpkt: Return TRUE if we can swap in a value to ipsa->ipsa_lpkt and
6920  * freemsg the previous value.  Return FALSE if we lost the race and the SA is
6921  * in a non-LARVAL state.  free clue: ip_drop_packet(NULL) is safe.
6922  */
6923 boolean_t
6924 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6925 {
6926 	mblk_t *opkt;
6927 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6928 	boolean_t is_larval;
6929 
6930 	/*
6931 	 * Check the packet's netstack id in case we go asynch with a
6932 	 * taskq_dispatch.
6933 	 */
6934 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_type == IPSEC_IN);
6935 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_stackid ==
6936 	    ns->netstack_stackid);
6937 
6938 	mutex_enter(&ipsa->ipsa_lock);
6939 	is_larval = (ipsa->ipsa_state == IPSA_STATE_LARVAL);
6940 	if (is_larval) {
6941 		opkt = ipsa->ipsa_lpkt;
6942 		ipsa->ipsa_lpkt = npkt;
6943 	} else {
6944 		/* We lost the race. */
6945 		opkt = NULL;
6946 		ASSERT(ipsa->ipsa_lpkt == NULL);
6947 	}
6948 	mutex_exit(&ipsa->ipsa_lock);
6949 
6950 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6951 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6952 	    &ipss->ipsec_sadb_dropper);
6953 	return (is_larval);
6954 }
6955 
6956 /*
6957  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6958  * previous value.
6959  */
6960 mblk_t *
6961 sadb_clear_lpkt(ipsa_t *ipsa)
6962 {
6963 	mblk_t *opkt;
6964 
6965 	mutex_enter(&ipsa->ipsa_lock);
6966 	opkt = ipsa->ipsa_lpkt;
6967 	ipsa->ipsa_lpkt = NULL;
6968 	mutex_exit(&ipsa->ipsa_lock);
6969 
6970 	return (opkt);
6971 }
6972 
6973 /*
6974  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6975  */
6976 void
6977 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns)
6978 {
6979 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6980 	extern void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t,
6981 	    sa_family_t, in6_addr_t, in6_addr_t, void *);
6982 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6983 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6984 
6985 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6986 
6987 	if (cl_inet_idlesa == NULL) {
6988 		ip_drop_packet(bpkt, B_TRUE, NULL, NULL,
6989 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6990 		    &ipss->ipsec_sadb_dropper);
6991 		return;
6992 	}
6993 
6994 	cl_inet_idlesa(ns->netstack_stackid,
6995 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6996 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6997 
6998 	/*
6999 	 * Check the packet's netstack id in case we go asynch with a
7000 	 * taskq_dispatch.
7001 	 */
7002 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_type == IPSEC_IN);
7003 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_stackid ==
7004 	    ns->netstack_stackid);
7005 
7006 	mutex_enter(&ipsa->ipsa_lock);
7007 	ipsa->ipsa_mblkcnt++;
7008 	if (ipsa->ipsa_bpkt_head == NULL) {
7009 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7010 	} else {
7011 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7012 		ipsa->ipsa_bpkt_tail = bpkt;
7013 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7014 			mblk_t *tmp;
7015 			tmp = ipsa->ipsa_bpkt_head;
7016 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7017 			ip_drop_packet(tmp, B_TRUE, NULL, NULL,
7018 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7019 			    &ipss->ipsec_sadb_dropper);
7020 			ipsa->ipsa_mblkcnt --;
7021 		}
7022 	}
7023 	mutex_exit(&ipsa->ipsa_lock);
7024 
7025 }
7026 
7027 /*
7028  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7029  * and put into STREAMS again.
7030  */
7031 void
7032 sadb_clear_buf_pkt(void *ipkt)
7033 {
7034 	mblk_t	*tmp, *buf_pkt;
7035 	netstack_t *ns;
7036 	ipsec_in_t *ii;
7037 
7038 	buf_pkt = (mblk_t *)ipkt;
7039 
7040 	ii = (ipsec_in_t *)buf_pkt->b_rptr;
7041 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
7042 	ns = netstack_find_by_stackid(ii->ipsec_in_stackid);
7043 	if (ns != NULL && ns != ii->ipsec_in_ns) {
7044 		netstack_rele(ns);
7045 		ns = NULL;  /* For while-loop below. */
7046 	}
7047 
7048 	while (buf_pkt != NULL) {
7049 		tmp = buf_pkt->b_next;
7050 		buf_pkt->b_next = NULL;
7051 		if (ns != NULL)
7052 			ip_fanout_proto_again(buf_pkt, NULL, NULL, NULL);
7053 		else
7054 			freemsg(buf_pkt);
7055 		buf_pkt = tmp;
7056 	}
7057 	if (ns != NULL)
7058 		netstack_rele(ns);
7059 }
7060 /*
7061  * Walker callback used by sadb_alg_update() to free/create crypto
7062  * context template when a crypto software provider is removed or
7063  * added.
7064  */
7065 
7066 struct sadb_update_alg_state {
7067 	ipsec_algtype_t alg_type;
7068 	uint8_t alg_id;
7069 	boolean_t is_added;
7070 };
7071 
7072 static void
7073 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7074 {
7075 	struct sadb_update_alg_state *update_state =
7076 	    (struct sadb_update_alg_state *)cookie;
7077 	crypto_ctx_template_t *ctx_tmpl = NULL;
7078 
7079 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7080 
7081 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7082 		return;
7083 
7084 	mutex_enter(&entry->ipsa_lock);
7085 
7086 	switch (update_state->alg_type) {
7087 	case IPSEC_ALG_AUTH:
7088 		if (entry->ipsa_auth_alg == update_state->alg_id)
7089 			ctx_tmpl = &entry->ipsa_authtmpl;
7090 		break;
7091 	case IPSEC_ALG_ENCR:
7092 		if (entry->ipsa_encr_alg == update_state->alg_id)
7093 			ctx_tmpl = &entry->ipsa_encrtmpl;
7094 		break;
7095 	default:
7096 		ctx_tmpl = NULL;
7097 	}
7098 
7099 	if (ctx_tmpl == NULL) {
7100 		mutex_exit(&entry->ipsa_lock);
7101 		return;
7102 	}
7103 
7104 	/*
7105 	 * The context template of the SA may be affected by the change
7106 	 * of crypto provider.
7107 	 */
7108 	if (update_state->is_added) {
7109 		/* create the context template if not already done */
7110 		if (*ctx_tmpl == NULL) {
7111 			(void) ipsec_create_ctx_tmpl(entry,
7112 			    update_state->alg_type);
7113 		}
7114 	} else {
7115 		/*
7116 		 * The crypto provider was removed. If the context template
7117 		 * exists but it is no longer valid, free it.
7118 		 */
7119 		if (*ctx_tmpl != NULL)
7120 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7121 	}
7122 
7123 	mutex_exit(&entry->ipsa_lock);
7124 }
7125 
7126 /*
7127  * Invoked by IP when an software crypto provider has been updated.
7128  * The type and id of the corresponding algorithm is passed as argument.
7129  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7130  * removed. The function updates the SADB and free/creates the
7131  * context templates associated with SAs if needed.
7132  */
7133 
7134 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7135     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7136 	&update_state)
7137 
7138 void
7139 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7140     netstack_t *ns)
7141 {
7142 	struct sadb_update_alg_state update_state;
7143 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7144 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7145 
7146 	update_state.alg_type = alg_type;
7147 	update_state.alg_id = alg_id;
7148 	update_state.is_added = is_added;
7149 
7150 	if (alg_type == IPSEC_ALG_AUTH) {
7151 		/* walk the AH tables only for auth. algorithm changes */
7152 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7153 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7154 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7155 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7156 	}
7157 
7158 	/* walk the ESP tables */
7159 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7160 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7161 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7162 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7163 }
7164 
7165 /*
7166  * Creates a context template for the specified SA. This function
7167  * is called when an SA is created and when a context template needs
7168  * to be created due to a change of software provider.
7169  */
7170 int
7171 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7172 {
7173 	ipsec_alginfo_t *alg;
7174 	crypto_mechanism_t mech;
7175 	crypto_key_t *key;
7176 	crypto_ctx_template_t *sa_tmpl;
7177 	int rv;
7178 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7179 
7180 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7181 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7182 
7183 	/* get pointers to the algorithm info, context template, and key */
7184 	switch (alg_type) {
7185 	case IPSEC_ALG_AUTH:
7186 		key = &sa->ipsa_kcfauthkey;
7187 		sa_tmpl = &sa->ipsa_authtmpl;
7188 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7189 		break;
7190 	case IPSEC_ALG_ENCR:
7191 		key = &sa->ipsa_kcfencrkey;
7192 		sa_tmpl = &sa->ipsa_encrtmpl;
7193 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7194 		break;
7195 	default:
7196 		alg = NULL;
7197 	}
7198 
7199 	if (alg == NULL || !ALG_VALID(alg))
7200 		return (EINVAL);
7201 
7202 	/* initialize the mech info structure for the framework */
7203 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7204 	mech.cm_type = alg->alg_mech_type;
7205 	mech.cm_param = NULL;
7206 	mech.cm_param_len = 0;
7207 
7208 	/* create a new context template */
7209 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7210 
7211 	/*
7212 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7213 	 * providers are available for that mechanism. In that case
7214 	 * we don't fail, and will generate the context template from
7215 	 * the framework callback when a software provider for that
7216 	 * mechanism registers.
7217 	 *
7218 	 * The context template is assigned the special value
7219 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7220 	 * lack of memory. No attempt will be made to use
7221 	 * the context template if it is set to this value.
7222 	 */
7223 	if (rv == CRYPTO_HOST_MEMORY) {
7224 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7225 	} else if (rv != CRYPTO_SUCCESS) {
7226 		*sa_tmpl = NULL;
7227 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7228 			return (EINVAL);
7229 	}
7230 
7231 	return (0);
7232 }
7233 
7234 /*
7235  * Destroy the context template of the specified algorithm type
7236  * of the specified SA. Must be called while holding the SA lock.
7237  */
7238 void
7239 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7240 {
7241 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7242 
7243 	if (alg_type == IPSEC_ALG_AUTH) {
7244 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7245 			sa->ipsa_authtmpl = NULL;
7246 		else if (sa->ipsa_authtmpl != NULL) {
7247 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7248 			sa->ipsa_authtmpl = NULL;
7249 		}
7250 	} else {
7251 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7252 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7253 			sa->ipsa_encrtmpl = NULL;
7254 		else if (sa->ipsa_encrtmpl != NULL) {
7255 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7256 			sa->ipsa_encrtmpl = NULL;
7257 		}
7258 	}
7259 }
7260 
7261 /*
7262  * Use the kernel crypto framework to check the validity of a key received
7263  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7264  */
7265 int
7266 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7267     boolean_t is_auth, int *diag)
7268 {
7269 	crypto_mechanism_t mech;
7270 	crypto_key_t crypto_key;
7271 	int crypto_rc;
7272 
7273 	mech.cm_type = mech_type;
7274 	mech.cm_param = NULL;
7275 	mech.cm_param_len = 0;
7276 
7277 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7278 	crypto_key.ck_data = sadb_key + 1;
7279 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7280 
7281 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7282 
7283 	switch (crypto_rc) {
7284 	case CRYPTO_SUCCESS:
7285 		return (0);
7286 	case CRYPTO_MECHANISM_INVALID:
7287 	case CRYPTO_MECH_NOT_SUPPORTED:
7288 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7289 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7290 		break;
7291 	case CRYPTO_KEY_SIZE_RANGE:
7292 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7293 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7294 		break;
7295 	case CRYPTO_WEAK_KEY:
7296 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7297 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7298 		break;
7299 	}
7300 
7301 	return (-1);
7302 }
7303 /*
7304  * If this is an outgoing SA then add some fuzz to the
7305  * SOFT EXPIRE time. The reason for this is to stop
7306  * peers trying to renegotiate SOFT expiring SA's at
7307  * the same time. The amount of fuzz needs to be at
7308  * least 8 seconds which is the typical interval
7309  * sadb_ager(), although this is only a guide as it
7310  * selftunes.
7311  */
7312 void
7313 lifetime_fuzz(ipsa_t *assoc)
7314 {
7315 	uint8_t rnd;
7316 
7317 	if (assoc->ipsa_softaddlt == 0)
7318 		return;
7319 
7320 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7321 	rnd = (rnd & 0xF) + 8;
7322 	assoc->ipsa_softexpiretime -= rnd;
7323 	assoc->ipsa_softaddlt -= rnd;
7324 }
7325 void
7326 destroy_ipsa_pair(ipsap_t *ipsapp)
7327 {
7328 	if (ipsapp == NULL)
7329 		return;
7330 
7331 	/*
7332 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7333 	 * them in { }.
7334 	 */
7335 	if (ipsapp->ipsap_sa_ptr != NULL) {
7336 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7337 	}
7338 	if (ipsapp->ipsap_psa_ptr != NULL) {
7339 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7340 	}
7341 
7342 	kmem_free(ipsapp, sizeof (*ipsapp));
7343 }
7344 
7345 /*
7346  * The sadb_ager() function walks through the hash tables of SA's and ages
7347  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7348  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7349  * SA appears in both the inbound and outbound tables because its not possible
7350  * to determine its direction) are placed on a list when they expire. This is
7351  * to ensure that pair/peer SA's are reaped at the same time, even if they
7352  * expire at different times.
7353  *
7354  * This function is called twice by sadb_ager(), one after processing the
7355  * inbound table, then again after processing the outbound table.
7356  */
7357 void
7358 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7359 {
7360 	templist_t *listptr;
7361 	int outhash;
7362 	isaf_t *bucket;
7363 	boolean_t haspeer;
7364 	ipsa_t *peer_assoc, *dying;
7365 	/*
7366 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7367 	 * is address independent.
7368 	 */
7369 	while (haspeerlist != NULL) {
7370 		/* "dying" contains the SA that has a peer. */
7371 		dying = haspeerlist->ipsa;
7372 		haspeer = (dying->ipsa_haspeer);
7373 		listptr = haspeerlist;
7374 		haspeerlist = listptr->next;
7375 		kmem_free(listptr, sizeof (*listptr));
7376 		/*
7377 		 * Pick peer bucket based on addrfam.
7378 		 */
7379 		if (outbound) {
7380 			if (haspeer)
7381 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7382 			else
7383 				bucket = INBOUND_BUCKET(sp,
7384 				    dying->ipsa_otherspi);
7385 		} else { /* inbound */
7386 			if (haspeer) {
7387 				if (dying->ipsa_addrfam == AF_INET6) {
7388 					outhash = OUTBOUND_HASH_V6(sp,
7389 					    *((in6_addr_t *)&dying->
7390 					    ipsa_dstaddr));
7391 				} else {
7392 					outhash = OUTBOUND_HASH_V4(sp,
7393 					    *((ipaddr_t *)&dying->
7394 					    ipsa_dstaddr));
7395 				}
7396 			} else if (dying->ipsa_addrfam == AF_INET6) {
7397 				outhash = OUTBOUND_HASH_V6(sp,
7398 				    *((in6_addr_t *)&dying->
7399 				    ipsa_srcaddr));
7400 			} else {
7401 				outhash = OUTBOUND_HASH_V4(sp,
7402 				    *((ipaddr_t *)&dying->
7403 				    ipsa_srcaddr));
7404 			}
7405 		bucket = &(sp->sdb_of[outhash]);
7406 		}
7407 
7408 		mutex_enter(&bucket->isaf_lock);
7409 		/*
7410 		 * "haspeer" SA's have the same src/dst address ordering,
7411 		 * "paired" SA's have the src/dst addresses reversed.
7412 		 */
7413 		if (haspeer) {
7414 			peer_assoc = ipsec_getassocbyspi(bucket,
7415 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7416 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7417 		} else {
7418 			peer_assoc = ipsec_getassocbyspi(bucket,
7419 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7420 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7421 		}
7422 
7423 		mutex_exit(&bucket->isaf_lock);
7424 		if (peer_assoc != NULL) {
7425 			mutex_enter(&peer_assoc->ipsa_lock);
7426 			mutex_enter(&dying->ipsa_lock);
7427 			if (!haspeer) {
7428 				/*
7429 				 * Only SA's which have a "peer" or are
7430 				 * "paired" end up on this list, so this
7431 				 * must be a "paired" SA, update the flags
7432 				 * to break the pair.
7433 				 */
7434 				peer_assoc->ipsa_otherspi = 0;
7435 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7436 				dying->ipsa_otherspi = 0;
7437 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7438 			}
7439 			if (haspeer || outbound) {
7440 				/*
7441 				 * Update the state of the "inbound" SA when
7442 				 * the "outbound" SA has expired. Don't update
7443 				 * the "outbound" SA when the "inbound" SA
7444 				 * SA expires because setting the hard_addtime
7445 				 * below will cause this to happen.
7446 				 */
7447 				peer_assoc->ipsa_state = dying->ipsa_state;
7448 			}
7449 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7450 				peer_assoc->ipsa_hardexpiretime = 1;
7451 
7452 			mutex_exit(&dying->ipsa_lock);
7453 			mutex_exit(&peer_assoc->ipsa_lock);
7454 			IPSA_REFRELE(peer_assoc);
7455 		}
7456 		IPSA_REFRELE(dying);
7457 	}
7458 }
7459