xref: /titanic_50/usr/src/uts/common/inet/ip/sadb.c (revision e31df31051ab05e561eab5b23bb1c00627a10d64)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/strsubr.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <inet/common.h>
49 #include <netinet/ip6.h>
50 #include <inet/ip.h>
51 #include <inet/ip_ire.h>
52 #include <inet/ip6.h>
53 #include <inet/ipsec_info.h>
54 #include <inet/tcp.h>
55 #include <inet/sadb.h>
56 #include <inet/ipsec_impl.h>
57 #include <inet/ipsecah.h>
58 #include <inet/ipsecesp.h>
59 #include <sys/random.h>
60 #include <sys/dlpi.h>
61 #include <sys/iphada.h>
62 #include <inet/ip_if.h>
63 #include <inet/ipdrop.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/sctp_ip.h>
66 
67 /*
68  * This source file contains Security Association Database (SADB) common
69  * routines.  They are linked in with the AH module.  Since AH has no chance
70  * of falling under export control, it was safe to link it in there.
71  */
72 
73 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
74     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
75 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
76 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
77 static void sadb_drain_torchq(queue_t *, mblk_t *);
78 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
79 			    netstack_t *);
80 static void sadb_destroy(sadb_t *, netstack_t *);
81 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
82 
83 static time_t sadb_add_time(time_t, uint64_t);
84 static void lifetime_fuzz(ipsa_t *);
85 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
86 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
87 
88 extern void (*cl_inet_getspi)(netstackid_t stack_id, uint8_t protocol,
89     uint8_t *ptr, size_t len, void *args);
90 extern int (*cl_inet_checkspi)(netstackid_t stack_id, uint8_t protocol,
91     uint32_t spi, void *args);
92 extern void (*cl_inet_deletespi)(netstackid_t stack_id, uint8_t protocol,
93     uint32_t spi, void *args);
94 
95 /*
96  * ipsacq_maxpackets is defined here to make it tunable
97  * from /etc/system.
98  */
99 extern uint64_t ipsacq_maxpackets;
100 
101 #define	SET_EXPIRE(sa, delta, exp) {				\
102 	if (((sa)->ipsa_ ## delta) != 0) {				\
103 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
104 			(sa)->ipsa_ ## delta);				\
105 	}								\
106 }
107 
108 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
109 	if (((sa)->ipsa_ ## delta) != 0) {				\
110 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
111 			(sa)->ipsa_ ## delta);				\
112 		if (((sa)->ipsa_ ## exp) == 0)				\
113 			(sa)->ipsa_ ## exp = tmp;			\
114 		else							\
115 			(sa)->ipsa_ ## exp = 				\
116 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
117 	}								\
118 }
119 
120 
121 /* wrap the macro so we can pass it as a function pointer */
122 void
123 sadb_sa_refrele(void *target)
124 {
125 	IPSA_REFRELE(((ipsa_t *)target));
126 }
127 
128 /*
129  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
130  * a signed type.
131  */
132 #define	TIME_MAX LONG_MAX
133 
134 /*
135  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
136  * time_t is defined to be a signed type with the same range as
137  * "long".  On ILP32 systems, we thus run the risk of wrapping around
138  * at end of time, as well as "overwrapping" the clock back around
139  * into a seemingly valid but incorrect future date earlier than the
140  * desired expiration.
141  *
142  * In order to avoid odd behavior (either negative lifetimes or loss
143  * of high order bits) when someone asks for bizarrely long SA
144  * lifetimes, we do a saturating add for expire times.
145  *
146  * We presume that ILP32 systems will be past end of support life when
147  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
148  *
149  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
150  * will hopefully have figured out clever ways to avoid the use of
151  * fixed-sized integers in computation.
152  */
153 static time_t
154 sadb_add_time(time_t base, uint64_t delta)
155 {
156 	time_t sum;
157 
158 	/*
159 	 * Clip delta to the maximum possible time_t value to
160 	 * prevent "overwrapping" back into a shorter-than-desired
161 	 * future time.
162 	 */
163 	if (delta > TIME_MAX)
164 		delta = TIME_MAX;
165 	/*
166 	 * This sum may still overflow.
167 	 */
168 	sum = base + delta;
169 
170 	/*
171 	 * .. so if the result is less than the base, we overflowed.
172 	 */
173 	if (sum < base)
174 		sum = TIME_MAX;
175 
176 	return (sum);
177 }
178 
179 /*
180  * Callers of this function have already created a working security
181  * association, and have found the appropriate table & hash chain.  All this
182  * function does is check duplicates, and insert the SA.  The caller needs to
183  * hold the hash bucket lock and increment the refcnt before insertion.
184  *
185  * Return 0 if success, EEXIST if collision.
186  */
187 #define	SA_UNIQUE_MATCH(sa1, sa2) \
188 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
189 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
190 
191 int
192 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
193 {
194 	ipsa_t **ptpn = NULL;
195 	ipsa_t *walker;
196 	boolean_t unspecsrc;
197 
198 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
199 
200 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
201 
202 	walker = bucket->isaf_ipsa;
203 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
204 
205 	/*
206 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
207 	 * of the list unless there's an unspecified source address, then
208 	 * insert it after the last SA with a specified source address.
209 	 *
210 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
211 	 * checking for collisions.
212 	 */
213 
214 	while (walker != NULL) {
215 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
216 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
217 			if (walker->ipsa_spi == ipsa->ipsa_spi)
218 				return (EEXIST);
219 
220 			mutex_enter(&walker->ipsa_lock);
221 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
222 			    (walker->ipsa_flags & IPSA_F_USED) &&
223 			    SA_UNIQUE_MATCH(walker, ipsa)) {
224 				walker->ipsa_flags |= IPSA_F_CINVALID;
225 			}
226 			mutex_exit(&walker->ipsa_lock);
227 		}
228 
229 		if (ptpn == NULL && unspecsrc) {
230 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
231 			    walker->ipsa_addrfam))
232 				ptpn = walker->ipsa_ptpn;
233 			else if (walker->ipsa_next == NULL)
234 				ptpn = &walker->ipsa_next;
235 		}
236 
237 		walker = walker->ipsa_next;
238 	}
239 
240 	if (ptpn == NULL)
241 		ptpn = &bucket->isaf_ipsa;
242 	ipsa->ipsa_next = *ptpn;
243 	ipsa->ipsa_ptpn = ptpn;
244 	if (ipsa->ipsa_next != NULL)
245 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
246 	*ptpn = ipsa;
247 	ipsa->ipsa_linklock = &bucket->isaf_lock;
248 
249 	return (0);
250 }
251 #undef SA_UNIQUE_MATCH
252 
253 /*
254  * Free a security association.  Its reference count is 0, which means
255  * I must free it.  The SA must be unlocked and must not be linked into
256  * any fanout list.
257  */
258 static void
259 sadb_freeassoc(ipsa_t *ipsa)
260 {
261 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
262 
263 	ASSERT(ipss != NULL);
264 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
265 	ASSERT(ipsa->ipsa_refcnt == 0);
266 	ASSERT(ipsa->ipsa_next == NULL);
267 	ASSERT(ipsa->ipsa_ptpn == NULL);
268 
269 	mutex_enter(&ipsa->ipsa_lock);
270 	/* Don't call sadb_clear_lpkt() since we hold the ipsa_lock anyway. */
271 	ip_drop_packet(ipsa->ipsa_lpkt, B_TRUE, NULL, NULL,
272 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
273 	    &ipss->ipsec_sadb_dropper);
274 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
275 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
276 	mutex_exit(&ipsa->ipsa_lock);
277 
278 	/* bzero() these fields for paranoia's sake. */
279 	if (ipsa->ipsa_authkey != NULL) {
280 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
281 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
282 	}
283 	if (ipsa->ipsa_encrkey != NULL) {
284 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
285 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
286 	}
287 	if (ipsa->ipsa_src_cid != NULL) {
288 		IPSID_REFRELE(ipsa->ipsa_src_cid);
289 	}
290 	if (ipsa->ipsa_dst_cid != NULL) {
291 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
292 	}
293 	if (ipsa->ipsa_integ != NULL)
294 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
295 	if (ipsa->ipsa_sens != NULL)
296 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
297 
298 	mutex_destroy(&ipsa->ipsa_lock);
299 	kmem_free(ipsa, sizeof (*ipsa));
300 }
301 
302 /*
303  * Unlink a security association from a hash bucket.  Assume the hash bucket
304  * lock is held, but the association's lock is not.
305  *
306  * Note that we do not bump the bucket's generation number here because
307  * we might not be making a visible change to the set of visible SA's.
308  * All callers MUST bump the bucket's generation number before they unlock
309  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
310  * was present in the bucket at the time it was locked.
311  */
312 void
313 sadb_unlinkassoc(ipsa_t *ipsa)
314 {
315 	ASSERT(ipsa->ipsa_linklock != NULL);
316 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
317 
318 	/* These fields are protected by the link lock. */
319 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
320 	if (ipsa->ipsa_next != NULL) {
321 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
322 		ipsa->ipsa_next = NULL;
323 	}
324 
325 	ipsa->ipsa_ptpn = NULL;
326 
327 	/* This may destroy the SA. */
328 	IPSA_REFRELE(ipsa);
329 }
330 
331 void
332 sadb_delete_cluster(ipsa_t *assoc)
333 {
334 	uint8_t protocol;
335 
336 	if (cl_inet_deletespi &&
337 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
338 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
339 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
340 		    IPPROTO_AH : IPPROTO_ESP;
341 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
342 		    protocol, assoc->ipsa_spi, NULL);
343 	}
344 }
345 
346 /*
347  * Create a larval security association with the specified SPI.	 All other
348  * fields are zeroed.
349  */
350 static ipsa_t *
351 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
352     netstack_t *ns)
353 {
354 	ipsa_t *newbie;
355 
356 	/*
357 	 * Allocate...
358 	 */
359 
360 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
361 	if (newbie == NULL) {
362 		/* Can't make new larval SA. */
363 		return (NULL);
364 	}
365 
366 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
367 	newbie->ipsa_spi = spi;
368 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
369 
370 	/*
371 	 * Copy addresses...
372 	 */
373 
374 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
375 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
376 
377 	newbie->ipsa_addrfam = addrfam;
378 
379 	/*
380 	 * Set common initialization values, including refcnt.
381 	 */
382 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
383 	newbie->ipsa_state = IPSA_STATE_LARVAL;
384 	newbie->ipsa_refcnt = 1;
385 	newbie->ipsa_freefunc = sadb_freeassoc;
386 
387 	/*
388 	 * There aren't a lot of other common initialization values, as
389 	 * they are copied in from the PF_KEY message.
390 	 */
391 
392 	return (newbie);
393 }
394 
395 /*
396  * Call me to initialize a security association fanout.
397  */
398 static int
399 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
400 {
401 	isaf_t *table;
402 	int i;
403 
404 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
405 	*tablep = table;
406 
407 	if (table == NULL)
408 		return (ENOMEM);
409 
410 	for (i = 0; i < size; i++) {
411 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
412 		table[i].isaf_ipsa = NULL;
413 		table[i].isaf_gen = 0;
414 	}
415 
416 	return (0);
417 }
418 
419 /*
420  * Call me to initialize an acquire fanout
421  */
422 static int
423 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
424 {
425 	iacqf_t *table;
426 	int i;
427 
428 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
429 	*tablep = table;
430 
431 	if (table == NULL)
432 		return (ENOMEM);
433 
434 	for (i = 0; i < size; i++) {
435 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
436 		table[i].iacqf_ipsacq = NULL;
437 	}
438 
439 	return (0);
440 }
441 
442 /*
443  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
444  * caller must clean up partial allocations.
445  */
446 static int
447 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
448 {
449 	ASSERT(sp->sdb_of == NULL);
450 	ASSERT(sp->sdb_if == NULL);
451 	ASSERT(sp->sdb_acq == NULL);
452 
453 	sp->sdb_hashsize = size;
454 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
455 		return (ENOMEM);
456 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
457 		return (ENOMEM);
458 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
459 		return (ENOMEM);
460 
461 	return (0);
462 }
463 
464 /*
465  * Call me to initialize an SADB instance; fall back to default size on failure.
466  */
467 static void
468 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
469     netstack_t *ns)
470 {
471 	ASSERT(sp->sdb_of == NULL);
472 	ASSERT(sp->sdb_if == NULL);
473 	ASSERT(sp->sdb_acq == NULL);
474 
475 	if (size < IPSEC_DEFAULT_HASH_SIZE)
476 		size = IPSEC_DEFAULT_HASH_SIZE;
477 
478 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
479 
480 		cmn_err(CE_WARN,
481 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
482 		    size, ver, name);
483 
484 		sadb_destroy(sp, ns);
485 		size = IPSEC_DEFAULT_HASH_SIZE;
486 		cmn_err(CE_WARN, "Falling back to %d entries", size);
487 		(void) sadb_init_trial(sp, size, KM_SLEEP);
488 	}
489 }
490 
491 
492 /*
493  * Initialize an SADB-pair.
494  */
495 void
496 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
497 {
498 	sadb_init(name, &sp->s_v4, size, 4, ns);
499 	sadb_init(name, &sp->s_v6, size, 6, ns);
500 
501 	sp->s_satype = type;
502 
503 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
504 	if (type == SADB_SATYPE_AH) {
505 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
506 
507 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
508 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
509 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
510 	} else {
511 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
512 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
513 	}
514 }
515 
516 /*
517  * Deliver a single SADB_DUMP message representing a single SA.  This is
518  * called many times by sadb_dump().
519  *
520  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
521  * the caller should take that as a hint that dupb() on the "original answer"
522  * failed, and that perhaps the caller should try again with a copyb()ed
523  * "original answer".
524  */
525 static int
526 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
527     sadb_msg_t *samsg)
528 {
529 	mblk_t *answer;
530 
531 	answer = dupb(original_answer);
532 	if (answer == NULL)
533 		return (ENOBUFS);
534 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
535 	if (answer->b_cont == NULL) {
536 		freeb(answer);
537 		return (ENOMEM);
538 	}
539 
540 	/* Just do a putnext, and let keysock deal with flow control. */
541 	putnext(pfkey_q, answer);
542 	return (0);
543 }
544 
545 /*
546  * Common function to allocate and prepare a keysock_out_t M_CTL message.
547  */
548 mblk_t *
549 sadb_keysock_out(minor_t serial)
550 {
551 	mblk_t *mp;
552 	keysock_out_t *kso;
553 
554 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
555 	if (mp != NULL) {
556 		mp->b_datap->db_type = M_CTL;
557 		mp->b_wptr += sizeof (ipsec_info_t);
558 		kso = (keysock_out_t *)mp->b_rptr;
559 		kso->ks_out_type = KEYSOCK_OUT;
560 		kso->ks_out_len = sizeof (*kso);
561 		kso->ks_out_serial = serial;
562 	}
563 
564 	return (mp);
565 }
566 
567 /*
568  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
569  * to keysock.
570  */
571 static int
572 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
573     int num_entries, boolean_t do_peers, time_t active_time)
574 {
575 	int i, error = 0;
576 	mblk_t *original_answer;
577 	ipsa_t *walker;
578 	sadb_msg_t *samsg;
579 	time_t	current;
580 
581 	/*
582 	 * For each IPSA hash bucket do:
583 	 *	- Hold the mutex
584 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
585 	 */
586 	ASSERT(mp->b_cont != NULL);
587 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
588 
589 	original_answer = sadb_keysock_out(serial);
590 	if (original_answer == NULL)
591 		return (ENOMEM);
592 
593 	current = gethrestime_sec();
594 	for (i = 0; i < num_entries; i++) {
595 		mutex_enter(&fanout[i].isaf_lock);
596 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
597 		    walker = walker->ipsa_next) {
598 			if (!do_peers && walker->ipsa_haspeer)
599 				continue;
600 			if ((active_time != 0) &&
601 			    ((current - walker->ipsa_lastuse) > active_time))
602 				continue;
603 			error = sadb_dump_deliver(pfkey_q, original_answer,
604 			    walker, samsg);
605 			if (error == ENOBUFS) {
606 				mblk_t *new_original_answer;
607 
608 				/* Ran out of dupb's.  Try a copyb. */
609 				new_original_answer = copyb(original_answer);
610 				if (new_original_answer == NULL) {
611 					error = ENOMEM;
612 				} else {
613 					freeb(original_answer);
614 					original_answer = new_original_answer;
615 					error = sadb_dump_deliver(pfkey_q,
616 					    original_answer, walker, samsg);
617 				}
618 			}
619 			if (error != 0)
620 				break;	/* out of for loop. */
621 		}
622 		mutex_exit(&fanout[i].isaf_lock);
623 		if (error != 0)
624 			break;	/* out of for loop. */
625 	}
626 
627 	freeb(original_answer);
628 	return (error);
629 }
630 
631 /*
632  * Dump an entire SADB; outbound first, then inbound.
633  */
634 
635 int
636 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
637 {
638 	int error;
639 	time_t	active_time = 0;
640 	sadb_x_edump_t	*edump =
641 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
642 
643 	if (edump != NULL) {
644 		active_time = edump->sadb_x_edump_timeout;
645 	}
646 
647 	/* Dump outbound */
648 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
649 	    sp->sdb_hashsize, B_TRUE, active_time);
650 	if (error)
651 		return (error);
652 
653 	/* Dump inbound */
654 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
655 	    sp->sdb_hashsize, B_FALSE, active_time);
656 }
657 
658 /*
659  * Generic sadb table walker.
660  *
661  * Call "walkfn" for each SA in each bucket in "table"; pass the
662  * bucket, the entry and "cookie" to the callback function.
663  * Take care to ensure that walkfn can delete the SA without screwing
664  * up our traverse.
665  *
666  * The bucket is locked for the duration of the callback, both so that the
667  * callback can just call sadb_unlinkassoc() when it wants to delete something,
668  * and so that no new entries are added while we're walking the list.
669  */
670 static void
671 sadb_walker(isaf_t *table, uint_t numentries,
672     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
673     void *cookie)
674 {
675 	int i;
676 	for (i = 0; i < numentries; i++) {
677 		ipsa_t *entry, *next;
678 
679 		mutex_enter(&table[i].isaf_lock);
680 
681 		for (entry = table[i].isaf_ipsa; entry != NULL;
682 		    entry = next) {
683 			next = entry->ipsa_next;
684 			(*walkfn)(&table[i], entry, cookie);
685 		}
686 		mutex_exit(&table[i].isaf_lock);
687 	}
688 }
689 
690 /*
691  * From the given SA, construct a dl_ct_ipsec_key and
692  * a dl_ct_ipsec structures to be sent to the adapter as part
693  * of a DL_CONTROL_REQ.
694  *
695  * ct_sa must point to the storage allocated for the key
696  * structure and must be followed by storage allocated
697  * for the SA information that must be sent to the driver
698  * as part of the DL_CONTROL_REQ request.
699  *
700  * The is_inbound boolean indicates whether the specified
701  * SA is part of an inbound SA table.
702  *
703  * Returns B_TRUE if the corresponding SA must be passed to
704  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
705  */
706 static boolean_t
707 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
708 {
709 	dl_ct_ipsec_key_t *keyp;
710 	dl_ct_ipsec_t *sap;
711 	void *ct_sa = mp->b_wptr;
712 
713 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
714 
715 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
716 	sap = (dl_ct_ipsec_t *)(keyp + 1);
717 
718 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
719 	    "is_inbound = %d\n", is_inbound));
720 
721 	/* initialize flag */
722 	sap->sadb_sa_flags = 0;
723 	if (is_inbound) {
724 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
725 		/*
726 		 * If an inbound SA has a peer, then mark it has being
727 		 * an outbound SA as well.
728 		 */
729 		if (sa->ipsa_haspeer)
730 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
731 	} else {
732 		/*
733 		 * If an outbound SA has a peer, then don't send it,
734 		 * since we will send the copy from the inbound table.
735 		 */
736 		if (sa->ipsa_haspeer) {
737 			freemsg(mp);
738 			return (B_FALSE);
739 		}
740 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
741 	}
742 
743 	keyp->dl_key_spi = sa->ipsa_spi;
744 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
745 	    DL_CTL_IPSEC_ADDR_LEN);
746 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
747 
748 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
749 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
750 
751 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
752 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
753 	bcopy(sa->ipsa_authkey,
754 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
755 
756 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
757 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
758 	bcopy(sa->ipsa_encrkey,
759 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
760 
761 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
762 	return (B_TRUE);
763 }
764 
765 /*
766  * Called from AH or ESP to format a message which will be used to inform
767  * IPsec-acceleration-capable ills of a SADB change.
768  * (It is not possible to send the message to IP directly from this function
769  * since the SA, if any, is locked during the call).
770  *
771  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
772  * sa_type: identifies whether the operation applies to AH or ESP
773  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
774  * sa: Pointer to an SA.  Must be non-NULL and locked
775  *	for ADD, DELETE, GET, and UPDATE operations.
776  * This function returns an mblk chain that must be passed to IP
777  * for forwarding to the IPsec capable providers.
778  */
779 mblk_t *
780 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
781     boolean_t is_inbound)
782 {
783 	mblk_t *mp;
784 	dl_control_req_t *ctrl;
785 	boolean_t need_key = B_FALSE;
786 	mblk_t *ctl_mp = NULL;
787 	ipsec_ctl_t *ctl;
788 
789 	/*
790 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
791 	 * 2 if a key is needed for the operation
792 	 *    2.1 initialize key
793 	 *    2.2 if a full SA is needed for the operation
794 	 *	2.2.1 initialize full SA info
795 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
796 	 * to send the resulting message to IPsec capable ills.
797 	 */
798 
799 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
800 
801 	/*
802 	 * Allocate DL_CONTROL_REQ M_PROTO
803 	 * We allocate room for the SA even if it's not needed
804 	 * by some of the operations (for example flush)
805 	 */
806 	mp = allocb(sizeof (dl_control_req_t) +
807 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
808 	if (mp == NULL)
809 		return (NULL);
810 	mp->b_datap->db_type = M_PROTO;
811 
812 	/* initialize dl_control_req_t */
813 	ctrl = (dl_control_req_t *)mp->b_wptr;
814 	ctrl->dl_primitive = DL_CONTROL_REQ;
815 	ctrl->dl_operation = dl_operation;
816 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
817 	    DL_CT_IPSEC_ESP;
818 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
819 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
820 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
821 	    sizeof (dl_ct_ipsec_key_t);
822 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
823 	mp->b_wptr += sizeof (dl_control_req_t);
824 
825 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
826 		ASSERT(sa != NULL);
827 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
828 
829 		need_key = B_TRUE;
830 
831 		/*
832 		 * Initialize key and SA data. Note that for some
833 		 * operations the SA data is ignored by the provider
834 		 * (delete, etc.)
835 		 */
836 		if (!sadb_req_from_sa(sa, mp, is_inbound))
837 			return (NULL);
838 	}
839 
840 	/* construct control message */
841 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
842 	if (ctl_mp == NULL) {
843 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
844 		freemsg(mp);
845 		return (NULL);
846 	}
847 
848 	ctl_mp->b_datap->db_type = M_CTL;
849 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
850 	ctl_mp->b_cont = mp;
851 
852 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
853 	ctl->ipsec_ctl_type = IPSEC_CTL;
854 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
855 	ctl->ipsec_ctl_sa_type = sa_type;
856 
857 	if (need_key) {
858 		/*
859 		 * Keep an additional reference on SA, since it will be
860 		 * needed by IP to send control messages corresponding
861 		 * to that SA from its perimeter. IP will do a
862 		 * IPSA_REFRELE when done with the request.
863 		 */
864 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
865 		IPSA_REFHOLD(sa);
866 		ctl->ipsec_ctl_sa = sa;
867 	} else
868 		ctl->ipsec_ctl_sa = NULL;
869 
870 	return (ctl_mp);
871 }
872 
873 
874 /*
875  * Called by sadb_ill_download() to dump the entries for a specific
876  * fanout table.  For each SA entry in the table passed as argument,
877  * use mp as a template and constructs a full DL_CONTROL message, and
878  * call ill_dlpi_send(), provided by IP, to send the resulting
879  * messages to the ill.
880  */
881 static void
882 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
883     boolean_t is_inbound)
884 {
885 	ipsa_t *walker;
886 	mblk_t *nmp, *salist;
887 	int i, error = 0;
888 	ip_stack_t	*ipst = ill->ill_ipst;
889 	netstack_t	*ns = ipst->ips_netstack;
890 
891 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
892 	    (void *)fanout, num_entries));
893 	/*
894 	 * For each IPSA hash bucket do:
895 	 *	- Hold the mutex
896 	 *	- Walk each entry, sending a corresponding request to IP
897 	 *	  for it.
898 	 */
899 	ASSERT(mp->b_datap->db_type == M_PROTO);
900 
901 	for (i = 0; i < num_entries; i++) {
902 		mutex_enter(&fanout[i].isaf_lock);
903 		salist = NULL;
904 
905 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
906 		    walker = walker->ipsa_next) {
907 			IPSECHW_DEBUG(IPSECHW_SADB,
908 			    ("sadb_ill_df: sending SA to ill via IP \n"));
909 			/*
910 			 * Duplicate the template mp passed and
911 			 * complete DL_CONTROL_REQ data.
912 			 * To be more memory efficient, we could use
913 			 * dupb() for the M_CTL and copyb() for the M_PROTO
914 			 * as the M_CTL, since the M_CTL is the same for
915 			 * every SA entry passed down to IP for the same ill.
916 			 *
917 			 * Note that copymsg/copyb ensure that the new mblk
918 			 * is at least as large as the source mblk even if it's
919 			 * not using all its storage -- therefore, nmp
920 			 * has trailing space for sadb_req_from_sa to add
921 			 * the SA-specific bits.
922 			 */
923 			mutex_enter(&walker->ipsa_lock);
924 			if (ipsec_capab_match(ill,
925 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
926 			    walker, ns)) {
927 				nmp = copymsg(mp);
928 				if (nmp == NULL) {
929 					IPSECHW_DEBUG(IPSECHW_SADB,
930 					    ("sadb_ill_df: alloc error\n"));
931 					error = ENOMEM;
932 					mutex_exit(&walker->ipsa_lock);
933 					break;
934 				}
935 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
936 					nmp->b_next = salist;
937 					salist = nmp;
938 				}
939 			}
940 			mutex_exit(&walker->ipsa_lock);
941 		}
942 		mutex_exit(&fanout[i].isaf_lock);
943 		while (salist != NULL) {
944 			nmp = salist;
945 			salist = nmp->b_next;
946 			nmp->b_next = NULL;
947 			ill_dlpi_send(ill, nmp);
948 		}
949 		if (error != 0)
950 			break;	/* out of for loop. */
951 	}
952 }
953 
954 /*
955  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
956  * the type specified by sa_type to the specified ill.
957  *
958  * We call for each fanout table defined by the SADB (one per
959  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
960  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
961  * message to the ill.
962  */
963 void
964 sadb_ill_download(ill_t *ill, uint_t sa_type)
965 {
966 	mblk_t *protomp;	/* prototype message */
967 	dl_control_req_t *ctrl;
968 	sadbp_t *spp;
969 	sadb_t *sp;
970 	int dlt;
971 	ip_stack_t	*ipst = ill->ill_ipst;
972 	netstack_t	*ns = ipst->ips_netstack;
973 
974 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
975 
976 	/*
977 	 * Allocate and initialize prototype answer. A duplicate for
978 	 * each SA is sent down to the interface.
979 	 */
980 
981 	/* DL_CONTROL_REQ M_PROTO mblk_t */
982 	protomp = allocb(sizeof (dl_control_req_t) +
983 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
984 	if (protomp == NULL)
985 		return;
986 	protomp->b_datap->db_type = M_PROTO;
987 
988 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
989 	if (sa_type == SADB_SATYPE_ESP) {
990 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
991 
992 		spp = &espstack->esp_sadb;
993 	} else {
994 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
995 
996 		spp = &ahstack->ah_sadb;
997 	}
998 
999 	ctrl = (dl_control_req_t *)protomp->b_wptr;
1000 	ctrl->dl_primitive = DL_CONTROL_REQ;
1001 	ctrl->dl_operation = DL_CO_SET;
1002 	ctrl->dl_type = dlt;
1003 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
1004 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
1005 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
1006 	    sizeof (dl_ct_ipsec_key_t);
1007 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
1008 	protomp->b_wptr += sizeof (dl_control_req_t);
1009 
1010 	/*
1011 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
1012 	 * and dl_ct_ipsec_t
1013 	 */
1014 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
1015 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1016 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1017 	freemsg(protomp);
1018 }
1019 
1020 /*
1021  * Call me to free up a security association fanout.  Use the forever
1022  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
1023  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
1024  * when a module is unloaded).
1025  */
1026 static void
1027 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
1028     boolean_t inbound)
1029 {
1030 	int i;
1031 	isaf_t *table = *tablep;
1032 	uint8_t protocol;
1033 	ipsa_t *sa;
1034 	netstackid_t sid;
1035 
1036 	if (table == NULL)
1037 		return;
1038 
1039 	for (i = 0; i < numentries; i++) {
1040 		mutex_enter(&table[i].isaf_lock);
1041 		while ((sa = table[i].isaf_ipsa) != NULL) {
1042 			if (inbound && cl_inet_deletespi &&
1043 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
1044 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
1045 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
1046 				    IPPROTO_AH : IPPROTO_ESP;
1047 				sid = sa->ipsa_netstack->netstack_stackid;
1048 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
1049 				    NULL);
1050 			}
1051 			sadb_unlinkassoc(sa);
1052 		}
1053 		table[i].isaf_gen++;
1054 		mutex_exit(&table[i].isaf_lock);
1055 		if (forever)
1056 			mutex_destroy(&(table[i].isaf_lock));
1057 	}
1058 
1059 	if (forever) {
1060 		*tablep = NULL;
1061 		kmem_free(table, numentries * sizeof (*table));
1062 	}
1063 }
1064 
1065 /*
1066  * Entry points to sadb_destroyer().
1067  */
1068 static void
1069 sadb_flush(sadb_t *sp, netstack_t *ns)
1070 {
1071 	/*
1072 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1073 	 * enforcement, there would be a subtlety where I could add on the
1074 	 * heels of a flush.  With keysock's enforcement, however, this
1075 	 * makes ESP's job easy.
1076 	 */
1077 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
1078 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
1079 
1080 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1081 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
1082 }
1083 
1084 static void
1085 sadb_destroy(sadb_t *sp, netstack_t *ns)
1086 {
1087 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
1088 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
1089 
1090 	/* For each acquire, destroy it, including the bucket mutex. */
1091 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
1092 
1093 	ASSERT(sp->sdb_of == NULL);
1094 	ASSERT(sp->sdb_if == NULL);
1095 	ASSERT(sp->sdb_acq == NULL);
1096 }
1097 
1098 static void
1099 sadb_send_flush_req(sadbp_t *spp)
1100 {
1101 	mblk_t *ctl_mp;
1102 
1103 	/*
1104 	 * we've been unplumbed, or never were plumbed; don't go there.
1105 	 */
1106 	if (spp->s_ip_q == NULL)
1107 		return;
1108 
1109 	/* have IP send a flush msg to the IPsec accelerators */
1110 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1111 	if (ctl_mp != NULL)
1112 		putnext(spp->s_ip_q, ctl_mp);
1113 }
1114 
1115 void
1116 sadbp_flush(sadbp_t *spp, netstack_t *ns)
1117 {
1118 	sadb_flush(&spp->s_v4, ns);
1119 	sadb_flush(&spp->s_v6, ns);
1120 
1121 	sadb_send_flush_req(spp);
1122 }
1123 
1124 void
1125 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
1126 {
1127 	sadb_destroy(&spp->s_v4, ns);
1128 	sadb_destroy(&spp->s_v6, ns);
1129 
1130 	sadb_send_flush_req(spp);
1131 	if (spp->s_satype == SADB_SATYPE_AH) {
1132 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
1133 
1134 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
1135 	}
1136 }
1137 
1138 
1139 /*
1140  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1141  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1142  * EINVAL.
1143  */
1144 int
1145 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
1146     sadb_lifetime_t *idle)
1147 {
1148 	if (hard == NULL || soft == NULL)
1149 		return (0);
1150 
1151 	if (hard->sadb_lifetime_allocations != 0 &&
1152 	    soft->sadb_lifetime_allocations != 0 &&
1153 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1154 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1155 
1156 	if (hard->sadb_lifetime_bytes != 0 &&
1157 	    soft->sadb_lifetime_bytes != 0 &&
1158 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1159 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1160 
1161 	if (hard->sadb_lifetime_addtime != 0 &&
1162 	    soft->sadb_lifetime_addtime != 0 &&
1163 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1164 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1165 
1166 	if (hard->sadb_lifetime_usetime != 0 &&
1167 	    soft->sadb_lifetime_usetime != 0 &&
1168 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1169 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1170 
1171 	if (idle != NULL) {
1172 		if (hard->sadb_lifetime_addtime != 0 &&
1173 		    idle->sadb_lifetime_addtime != 0 &&
1174 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1175 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1176 
1177 		if (soft->sadb_lifetime_addtime != 0 &&
1178 		    idle->sadb_lifetime_addtime != 0 &&
1179 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
1180 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1181 
1182 		if (hard->sadb_lifetime_usetime != 0 &&
1183 		    idle->sadb_lifetime_usetime != 0 &&
1184 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1185 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1186 
1187 		if (soft->sadb_lifetime_usetime != 0 &&
1188 		    idle->sadb_lifetime_usetime != 0 &&
1189 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
1190 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1191 	}
1192 
1193 	return (0);
1194 }
1195 
1196 /*
1197  * Clone a security association for the purposes of inserting a single SA
1198  * into inbound and outbound tables respectively. This function should only
1199  * be called from sadb_common_add().
1200  */
1201 static ipsa_t *
1202 sadb_cloneassoc(ipsa_t *ipsa)
1203 {
1204 	ipsa_t *newbie;
1205 	boolean_t error = B_FALSE;
1206 
1207 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
1208 
1209 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1210 	if (newbie == NULL)
1211 		return (NULL);
1212 
1213 	/* Copy over what we can. */
1214 	*newbie = *ipsa;
1215 
1216 	/* bzero and initialize locks, in case *_init() allocates... */
1217 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1218 
1219 	/*
1220 	 * While somewhat dain-bramaged, the most graceful way to
1221 	 * recover from errors is to keep plowing through the
1222 	 * allocations, and getting what I can.  It's easier to call
1223 	 * sadb_freeassoc() on the stillborn clone when all the
1224 	 * pointers aren't pointing to the parent's data.
1225 	 */
1226 
1227 	if (ipsa->ipsa_authkey != NULL) {
1228 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1229 		    KM_NOSLEEP);
1230 		if (newbie->ipsa_authkey == NULL) {
1231 			error = B_TRUE;
1232 		} else {
1233 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1234 			    newbie->ipsa_authkeylen);
1235 
1236 			newbie->ipsa_kcfauthkey.ck_data =
1237 			    newbie->ipsa_authkey;
1238 		}
1239 
1240 		if (newbie->ipsa_amech.cm_param != NULL) {
1241 			newbie->ipsa_amech.cm_param =
1242 			    (char *)&newbie->ipsa_mac_len;
1243 		}
1244 	}
1245 
1246 	if (ipsa->ipsa_encrkey != NULL) {
1247 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1248 		    KM_NOSLEEP);
1249 		if (newbie->ipsa_encrkey == NULL) {
1250 			error = B_TRUE;
1251 		} else {
1252 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1253 			    newbie->ipsa_encrkeylen);
1254 
1255 			newbie->ipsa_kcfencrkey.ck_data =
1256 			    newbie->ipsa_encrkey;
1257 		}
1258 	}
1259 
1260 	newbie->ipsa_authtmpl = NULL;
1261 	newbie->ipsa_encrtmpl = NULL;
1262 	newbie->ipsa_haspeer = B_TRUE;
1263 
1264 	if (ipsa->ipsa_integ != NULL) {
1265 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1266 		    KM_NOSLEEP);
1267 		if (newbie->ipsa_integ == NULL) {
1268 			error = B_TRUE;
1269 		} else {
1270 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1271 			    newbie->ipsa_integlen);
1272 		}
1273 	}
1274 
1275 	if (ipsa->ipsa_sens != NULL) {
1276 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1277 		    KM_NOSLEEP);
1278 		if (newbie->ipsa_sens == NULL) {
1279 			error = B_TRUE;
1280 		} else {
1281 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1282 			    newbie->ipsa_senslen);
1283 		}
1284 	}
1285 
1286 	if (ipsa->ipsa_src_cid != NULL) {
1287 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1288 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1289 	}
1290 
1291 	if (ipsa->ipsa_dst_cid != NULL) {
1292 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1293 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1294 	}
1295 
1296 	if (error) {
1297 		sadb_freeassoc(newbie);
1298 		return (NULL);
1299 	}
1300 
1301 	return (newbie);
1302 }
1303 
1304 /*
1305  * Initialize a SADB address extension at the address specified by addrext.
1306  * Return a pointer to the end of the new address extension.
1307  */
1308 static uint8_t *
1309 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1310     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1311 {
1312 	struct sockaddr_in *sin;
1313 	struct sockaddr_in6 *sin6;
1314 	uint8_t *cur = start;
1315 	int addrext_len;
1316 	int sin_len;
1317 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1318 
1319 	if (cur == NULL)
1320 		return (NULL);
1321 
1322 	cur += sizeof (*addrext);
1323 	if (cur > end)
1324 		return (NULL);
1325 
1326 	addrext->sadb_address_proto = proto;
1327 	addrext->sadb_address_prefixlen = prefix;
1328 	addrext->sadb_address_reserved = 0;
1329 	addrext->sadb_address_exttype = exttype;
1330 
1331 	switch (af) {
1332 	case AF_INET:
1333 		sin = (struct sockaddr_in *)cur;
1334 		sin_len = sizeof (*sin);
1335 		cur += sin_len;
1336 		if (cur > end)
1337 			return (NULL);
1338 
1339 		sin->sin_family = af;
1340 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1341 		sin->sin_port = port;
1342 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1343 		break;
1344 	case AF_INET6:
1345 		sin6 = (struct sockaddr_in6 *)cur;
1346 		sin_len = sizeof (*sin6);
1347 		cur += sin_len;
1348 		if (cur > end)
1349 			return (NULL);
1350 
1351 		bzero(sin6, sizeof (*sin6));
1352 		sin6->sin6_family = af;
1353 		sin6->sin6_port = port;
1354 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1355 		break;
1356 	}
1357 
1358 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1359 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1360 
1361 	cur = start + addrext_len;
1362 	if (cur > end)
1363 		cur = NULL;
1364 
1365 	return (cur);
1366 }
1367 
1368 /*
1369  * Construct a key management cookie extension.
1370  */
1371 
1372 static uint8_t *
1373 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1374 {
1375 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1376 
1377 	if (cur == NULL)
1378 		return (NULL);
1379 
1380 	cur += sizeof (*kmcext);
1381 
1382 	if (cur > end)
1383 		return (NULL);
1384 
1385 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1386 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1387 	kmcext->sadb_x_kmc_proto = kmp;
1388 	kmcext->sadb_x_kmc_cookie = kmc;
1389 	kmcext->sadb_x_kmc_reserved = 0;
1390 
1391 	return (cur);
1392 }
1393 
1394 /*
1395  * Given an original message header with sufficient space following it, and an
1396  * SA, construct a full PF_KEY message with all of the relevant extensions.
1397  * This is mostly used for SADB_GET, and SADB_DUMP.
1398  */
1399 static mblk_t *
1400 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1401 {
1402 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1403 	int srcidsize, dstidsize;
1404 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1405 				/* src/dst and proxy sockaddrs. */
1406 	/*
1407 	 * The following are pointers into the PF_KEY message this PF_KEY
1408 	 * message creates.
1409 	 */
1410 	sadb_msg_t *newsamsg;
1411 	sadb_sa_t *assoc;
1412 	sadb_lifetime_t *lt;
1413 	sadb_key_t *key;
1414 	sadb_ident_t *ident;
1415 	sadb_sens_t *sens;
1416 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1417 	sadb_x_replay_ctr_t *repl_ctr;
1418 	sadb_x_pair_t *pair_ext;
1419 
1420 	mblk_t *mp;
1421 	uint64_t *bitmap;
1422 	uint8_t *cur, *end;
1423 	/* These indicate the presence of the above extension fields. */
1424 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1425 	boolean_t idle;
1426 	boolean_t paired;
1427 	uint32_t otherspi;
1428 
1429 	/* First off, figure out the allocation length for this message. */
1430 
1431 	/*
1432 	 * Constant stuff.  This includes base, SA, address (src, dst),
1433 	 * and lifetime (current).
1434 	 */
1435 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1436 	    sizeof (sadb_lifetime_t);
1437 
1438 	fam = ipsa->ipsa_addrfam;
1439 	switch (fam) {
1440 	case AF_INET:
1441 		addrsize = roundup(sizeof (struct sockaddr_in) +
1442 		    sizeof (sadb_address_t), sizeof (uint64_t));
1443 		break;
1444 	case AF_INET6:
1445 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1446 		    sizeof (sadb_address_t), sizeof (uint64_t));
1447 		break;
1448 	default:
1449 		return (NULL);
1450 	}
1451 	/*
1452 	 * Allocate TWO address extensions, for source and destination.
1453 	 * (Thus, the * 2.)
1454 	 */
1455 	alloclen += addrsize * 2;
1456 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1457 		alloclen += addrsize;
1458 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1459 		alloclen += addrsize;
1460 
1461 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1462 		paired = B_TRUE;
1463 		alloclen += sizeof (sadb_x_pair_t);
1464 		otherspi = ipsa->ipsa_otherspi;
1465 	} else {
1466 		paired = B_FALSE;
1467 	}
1468 
1469 	/* How 'bout other lifetimes? */
1470 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1471 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1472 		alloclen += sizeof (sadb_lifetime_t);
1473 		soft = B_TRUE;
1474 	} else {
1475 		soft = B_FALSE;
1476 	}
1477 
1478 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1479 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1480 		alloclen += sizeof (sadb_lifetime_t);
1481 		hard = B_TRUE;
1482 	} else {
1483 		hard = B_FALSE;
1484 	}
1485 
1486 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1487 		alloclen += sizeof (sadb_lifetime_t);
1488 		idle = B_TRUE;
1489 	} else {
1490 		idle = B_FALSE;
1491 	}
1492 
1493 	/* Inner addresses. */
1494 	if (ipsa->ipsa_innerfam == 0) {
1495 		isrc = B_FALSE;
1496 		idst = B_FALSE;
1497 	} else {
1498 		pfam = ipsa->ipsa_innerfam;
1499 		switch (pfam) {
1500 		case AF_INET6:
1501 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1502 			    sizeof (sadb_address_t), sizeof (uint64_t));
1503 			break;
1504 		case AF_INET:
1505 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1506 			    sizeof (sadb_address_t), sizeof (uint64_t));
1507 			break;
1508 		default:
1509 			cmn_err(CE_PANIC,
1510 			    "IPsec SADB: Proxy length failure.\n");
1511 			break;
1512 		}
1513 		isrc = B_TRUE;
1514 		idst = B_TRUE;
1515 		alloclen += 2 * paddrsize;
1516 	}
1517 
1518 	/* For the following fields, assume that length != 0 ==> stuff */
1519 	if (ipsa->ipsa_authkeylen != 0) {
1520 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1521 		    sizeof (uint64_t));
1522 		alloclen += authsize;
1523 		auth = B_TRUE;
1524 	} else {
1525 		auth = B_FALSE;
1526 	}
1527 
1528 	if (ipsa->ipsa_encrkeylen != 0) {
1529 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1530 		    sizeof (uint64_t));
1531 		alloclen += encrsize;
1532 		encr = B_TRUE;
1533 	} else {
1534 		encr = B_FALSE;
1535 	}
1536 
1537 	/* No need for roundup on sens and integ. */
1538 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1539 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1540 		    ipsa->ipsa_senslen;
1541 		sensinteg = B_TRUE;
1542 	} else {
1543 		sensinteg = B_FALSE;
1544 	}
1545 
1546 	/*
1547 	 * Must use strlen() here for lengths.	Identities use NULL
1548 	 * pointers to indicate their nonexistence.
1549 	 */
1550 	if (ipsa->ipsa_src_cid != NULL) {
1551 		srcidsize = roundup(sizeof (sadb_ident_t) +
1552 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1553 		    sizeof (uint64_t));
1554 		alloclen += srcidsize;
1555 		srcid = B_TRUE;
1556 	} else {
1557 		srcid = B_FALSE;
1558 	}
1559 
1560 	if (ipsa->ipsa_dst_cid != NULL) {
1561 		dstidsize = roundup(sizeof (sadb_ident_t) +
1562 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1563 		    sizeof (uint64_t));
1564 		alloclen += dstidsize;
1565 		dstid = B_TRUE;
1566 	} else {
1567 		dstid = B_FALSE;
1568 	}
1569 
1570 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1571 		alloclen += sizeof (sadb_x_kmc_t);
1572 
1573 	if (ipsa->ipsa_replay != 0) {
1574 		alloclen += sizeof (sadb_x_replay_ctr_t);
1575 	}
1576 
1577 	/* Make sure the allocation length is a multiple of 8 bytes. */
1578 	ASSERT((alloclen & 0x7) == 0);
1579 
1580 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1581 	mp = allocb(alloclen, BPRI_HI);
1582 	if (mp == NULL)
1583 		return (NULL);
1584 
1585 	mp->b_wptr += alloclen;
1586 	end = mp->b_wptr;
1587 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1588 	*newsamsg = *samsg;
1589 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1590 
1591 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1592 
1593 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1594 
1595 	assoc = (sadb_sa_t *)(newsamsg + 1);
1596 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1597 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1598 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1599 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1600 	assoc->sadb_sa_state = ipsa->ipsa_state;
1601 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1602 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1603 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1604 
1605 	lt = (sadb_lifetime_t *)(assoc + 1);
1606 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1607 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1608 	/* We do not support the concept. */
1609 	lt->sadb_lifetime_allocations = 0;
1610 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1611 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1612 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1613 
1614 	if (hard) {
1615 		lt++;
1616 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1617 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1618 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1619 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1620 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1621 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1622 	}
1623 
1624 	if (soft) {
1625 		lt++;
1626 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1627 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1628 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1629 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1630 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1631 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1632 	}
1633 
1634 	if (idle) {
1635 		lt++;
1636 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1637 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1638 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1639 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1640 	}
1641 
1642 	cur = (uint8_t *)(lt + 1);
1643 
1644 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1645 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1646 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1647 	    SA_PROTO(ipsa), 0);
1648 	if (cur == NULL) {
1649 		freemsg(mp);
1650 		mp = NULL;
1651 		goto bail;
1652 	}
1653 
1654 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1655 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1656 	    SA_PROTO(ipsa), 0);
1657 	if (cur == NULL) {
1658 		freemsg(mp);
1659 		mp = NULL;
1660 		goto bail;
1661 	}
1662 
1663 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1664 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1665 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1666 		    IPPROTO_UDP, 0);
1667 		if (cur == NULL) {
1668 			freemsg(mp);
1669 			mp = NULL;
1670 			goto bail;
1671 		}
1672 	}
1673 
1674 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1675 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1676 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1677 		    IPPROTO_UDP, 0);
1678 		if (cur == NULL) {
1679 			freemsg(mp);
1680 			mp = NULL;
1681 			goto bail;
1682 		}
1683 	}
1684 
1685 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1686 	if (isrc) {
1687 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1688 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1689 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1690 		if (cur == NULL) {
1691 			freemsg(mp);
1692 			mp = NULL;
1693 			goto bail;
1694 		}
1695 	}
1696 
1697 	if (idst) {
1698 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1699 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1700 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1701 		if (cur == NULL) {
1702 			freemsg(mp);
1703 			mp = NULL;
1704 			goto bail;
1705 		}
1706 	}
1707 
1708 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1709 		cur = sadb_make_kmc_ext(cur, end,
1710 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1711 		if (cur == NULL) {
1712 			freemsg(mp);
1713 			mp = NULL;
1714 			goto bail;
1715 		}
1716 	}
1717 
1718 	walker = (sadb_ext_t *)cur;
1719 	if (auth) {
1720 		key = (sadb_key_t *)walker;
1721 		key->sadb_key_len = SADB_8TO64(authsize);
1722 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1723 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1724 		key->sadb_key_reserved = 0;
1725 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1726 		walker = (sadb_ext_t *)((uint64_t *)walker +
1727 		    walker->sadb_ext_len);
1728 	}
1729 
1730 	if (encr) {
1731 		key = (sadb_key_t *)walker;
1732 		key->sadb_key_len = SADB_8TO64(encrsize);
1733 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1734 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1735 		key->sadb_key_reserved = 0;
1736 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1737 		walker = (sadb_ext_t *)((uint64_t *)walker +
1738 		    walker->sadb_ext_len);
1739 	}
1740 
1741 	if (srcid) {
1742 		ident = (sadb_ident_t *)walker;
1743 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1744 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1745 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1746 		ident->sadb_ident_id = 0;
1747 		ident->sadb_ident_reserved = 0;
1748 		(void) strcpy((char *)(ident + 1),
1749 		    ipsa->ipsa_src_cid->ipsid_cid);
1750 		walker = (sadb_ext_t *)((uint64_t *)walker +
1751 		    walker->sadb_ext_len);
1752 	}
1753 
1754 	if (dstid) {
1755 		ident = (sadb_ident_t *)walker;
1756 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1757 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1758 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1759 		ident->sadb_ident_id = 0;
1760 		ident->sadb_ident_reserved = 0;
1761 		(void) strcpy((char *)(ident + 1),
1762 		    ipsa->ipsa_dst_cid->ipsid_cid);
1763 		walker = (sadb_ext_t *)((uint64_t *)walker +
1764 		    walker->sadb_ext_len);
1765 	}
1766 
1767 	if (sensinteg) {
1768 		sens = (sadb_sens_t *)walker;
1769 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1770 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1771 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1772 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1773 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1774 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1775 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1776 		sens->sadb_sens_reserved = 0;
1777 		bitmap = (uint64_t *)(sens + 1);
1778 		if (ipsa->ipsa_sens != NULL) {
1779 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1780 			bitmap += sens->sadb_sens_sens_len;
1781 		}
1782 		if (ipsa->ipsa_integ != NULL)
1783 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1784 		walker = (sadb_ext_t *)((uint64_t *)walker +
1785 		    walker->sadb_ext_len);
1786 	}
1787 
1788 	if (paired) {
1789 		pair_ext = (sadb_x_pair_t *)walker;
1790 
1791 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1792 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1793 		pair_ext->sadb_x_pair_spi = otherspi;
1794 
1795 		walker = (sadb_ext_t *)((uint64_t *)walker +
1796 		    walker->sadb_ext_len);
1797 	}
1798 
1799 	if (ipsa->ipsa_replay != 0) {
1800 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1801 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1802 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1803 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1804 		repl_ctr->sadb_x_rc_replay64 = 0;
1805 		walker = (sadb_ext_t *)(repl_ctr + 1);
1806 	}
1807 
1808 bail:
1809 	/* Pardon any delays... */
1810 	mutex_exit(&ipsa->ipsa_lock);
1811 
1812 	return (mp);
1813 }
1814 
1815 /*
1816  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1817  * and adjust base message accordingly.
1818  *
1819  * Assume message is pulled up in one piece of contiguous memory.
1820  *
1821  * Say if we start off with:
1822  *
1823  * +------+----+-------------+-----------+---------------+---------------+
1824  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1825  * +------+----+-------------+-----------+---------------+---------------+
1826  *
1827  * we will end up with
1828  *
1829  * +------+----+-------------+-----------+---------------+
1830  * | base | SA | source addr | dest addr | soft lifetime |
1831  * +------+----+-------------+-----------+---------------+
1832  */
1833 static void
1834 sadb_strip(sadb_msg_t *samsg)
1835 {
1836 	sadb_ext_t *ext;
1837 	uint8_t *target = NULL;
1838 	uint8_t *msgend;
1839 	int sofar = SADB_8TO64(sizeof (*samsg));
1840 	int copylen;
1841 
1842 	ext = (sadb_ext_t *)(samsg + 1);
1843 	msgend = (uint8_t *)samsg;
1844 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1845 	while ((uint8_t *)ext < msgend) {
1846 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1847 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1848 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1849 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1850 			/*
1851 			 * Aha!	 I found a header to be erased.
1852 			 */
1853 
1854 			if (target != NULL) {
1855 				/*
1856 				 * If I had a previous header to be erased,
1857 				 * copy over it.  I can get away with just
1858 				 * copying backwards because the target will
1859 				 * always be 8 bytes behind the source.
1860 				 */
1861 				copylen = ((uint8_t *)ext) - (target +
1862 				    SADB_64TO8(
1863 				    ((sadb_ext_t *)target)->sadb_ext_len));
1864 				ovbcopy(((uint8_t *)ext - copylen), target,
1865 				    copylen);
1866 				target += copylen;
1867 				((sadb_ext_t *)target)->sadb_ext_len =
1868 				    SADB_8TO64(((uint8_t *)ext) - target +
1869 				    SADB_64TO8(ext->sadb_ext_len));
1870 			} else {
1871 				target = (uint8_t *)ext;
1872 			}
1873 		} else {
1874 			sofar += ext->sadb_ext_len;
1875 		}
1876 
1877 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1878 	}
1879 
1880 	ASSERT((uint8_t *)ext == msgend);
1881 
1882 	if (target != NULL) {
1883 		copylen = ((uint8_t *)ext) - (target +
1884 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1885 		if (copylen != 0)
1886 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1887 	}
1888 
1889 	/* Adjust samsg. */
1890 	samsg->sadb_msg_len = (uint16_t)sofar;
1891 
1892 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1893 }
1894 
1895 /*
1896  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1897  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1898  * the sending keysock instance is included.
1899  */
1900 void
1901 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1902     uint_t serial)
1903 {
1904 	mblk_t *msg = mp->b_cont;
1905 	sadb_msg_t *samsg;
1906 	keysock_out_t *kso;
1907 
1908 	/*
1909 	 * Enough functions call this to merit a NULL queue check.
1910 	 */
1911 	if (pfkey_q == NULL) {
1912 		freemsg(mp);
1913 		return;
1914 	}
1915 
1916 	ASSERT(msg != NULL);
1917 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1918 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1919 	samsg = (sadb_msg_t *)msg->b_rptr;
1920 	kso = (keysock_out_t *)mp->b_rptr;
1921 
1922 	kso->ks_out_type = KEYSOCK_OUT;
1923 	kso->ks_out_len = sizeof (*kso);
1924 	kso->ks_out_serial = serial;
1925 
1926 	/*
1927 	 * Only send the base message up in the event of an error.
1928 	 * Don't worry about bzero()-ing, because it was probably bogus
1929 	 * anyway.
1930 	 */
1931 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1932 	samsg = (sadb_msg_t *)msg->b_rptr;
1933 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1934 	samsg->sadb_msg_errno = (uint8_t)error;
1935 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1936 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1937 
1938 	putnext(pfkey_q, mp);
1939 }
1940 
1941 /*
1942  * Send a successful return packet back to keysock via the queue in pfkey_q.
1943  *
1944  * Often, an SA is associated with the reply message, it's passed in if needed,
1945  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1946  * and the caller will release said refcnt.
1947  */
1948 void
1949 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1950     keysock_in_t *ksi, ipsa_t *ipsa)
1951 {
1952 	keysock_out_t *kso;
1953 	mblk_t *mp1;
1954 	sadb_msg_t *newsamsg;
1955 	uint8_t *oldend;
1956 
1957 	ASSERT((mp->b_cont != NULL) &&
1958 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1959 	    ((void *)mp->b_rptr == (void *)ksi));
1960 
1961 	switch (samsg->sadb_msg_type) {
1962 	case SADB_ADD:
1963 	case SADB_UPDATE:
1964 	case SADB_X_UPDATEPAIR:
1965 	case SADB_X_DELPAIR_STATE:
1966 	case SADB_FLUSH:
1967 	case SADB_DUMP:
1968 		/*
1969 		 * I have all of the message already.  I just need to strip
1970 		 * out the keying material and echo the message back.
1971 		 *
1972 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1973 		 * work.  When DUMP reaches here, it should only be a base
1974 		 * message.
1975 		 */
1976 	justecho:
1977 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1978 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1979 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1980 			sadb_strip(samsg);
1981 			/* Assume PF_KEY message is contiguous. */
1982 			ASSERT(mp->b_cont->b_cont == NULL);
1983 			oldend = mp->b_cont->b_wptr;
1984 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1985 			    SADB_64TO8(samsg->sadb_msg_len);
1986 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1987 		}
1988 		break;
1989 	case SADB_GET:
1990 		/*
1991 		 * Do a lot of work here, because of the ipsa I just found.
1992 		 * First construct the new PF_KEY message, then abandon
1993 		 * the old one.
1994 		 */
1995 		mp1 = sadb_sa2msg(ipsa, samsg);
1996 		if (mp1 == NULL) {
1997 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1998 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1999 			return;
2000 		}
2001 		freemsg(mp->b_cont);
2002 		mp->b_cont = mp1;
2003 		break;
2004 	case SADB_DELETE:
2005 	case SADB_X_DELPAIR:
2006 		if (ipsa == NULL)
2007 			goto justecho;
2008 		/*
2009 		 * Because listening KMds may require more info, treat
2010 		 * DELETE like a special case of GET.
2011 		 */
2012 		mp1 = sadb_sa2msg(ipsa, samsg);
2013 		if (mp1 == NULL) {
2014 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
2015 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
2016 			return;
2017 		}
2018 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
2019 		sadb_strip(newsamsg);
2020 		oldend = mp1->b_wptr;
2021 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
2022 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
2023 		freemsg(mp->b_cont);
2024 		mp->b_cont = mp1;
2025 		break;
2026 	default:
2027 		if (mp != NULL)
2028 			freemsg(mp);
2029 		return;
2030 	}
2031 
2032 	/* ksi is now null and void. */
2033 	kso = (keysock_out_t *)ksi;
2034 	kso->ks_out_type = KEYSOCK_OUT;
2035 	kso->ks_out_len = sizeof (*kso);
2036 	kso->ks_out_serial = ksi->ks_in_serial;
2037 	/* We're ready to send... */
2038 	putnext(pfkey_q, mp);
2039 }
2040 
2041 /*
2042  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
2043  */
2044 void
2045 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
2046     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
2047 {
2048 	keysock_hello_ack_t *kha;
2049 	queue_t *oldq;
2050 
2051 	ASSERT(OTHERQ(q) != NULL);
2052 
2053 	/*
2054 	 * First, check atomically that I'm the first and only keysock
2055 	 * instance.
2056 	 *
2057 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
2058 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
2059 	 * messages.
2060 	 */
2061 
2062 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
2063 	if (oldq != NULL) {
2064 		ASSERT(oldq != q);
2065 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
2066 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
2067 		freemsg(mp);
2068 		return;
2069 	}
2070 
2071 	kha = (keysock_hello_ack_t *)mp->b_rptr;
2072 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
2073 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
2074 	kha->ks_hello_satype = (uint8_t)satype;
2075 
2076 	/*
2077 	 * If we made it past the casptr, then we have "exclusive" access
2078 	 * to the timeout handle.  Fire it off after the default ager
2079 	 * interval.
2080 	 */
2081 	*top = qtimeout(*pfkey_qp, ager, agerarg,
2082 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
2083 
2084 	putnext(*pfkey_qp, mp);
2085 }
2086 
2087 /*
2088  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
2089  *
2090  * Check addresses themselves for wildcard or multicast.
2091  * Check ire table for local/non-local/broadcast.
2092  */
2093 int
2094 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
2095     netstack_t *ns)
2096 {
2097 	sadb_address_t *addr = (sadb_address_t *)ext;
2098 	struct sockaddr_in *sin;
2099 	struct sockaddr_in6 *sin6;
2100 	ire_t *ire;
2101 	int diagnostic, type;
2102 	boolean_t normalized = B_FALSE;
2103 
2104 	ASSERT(ext != NULL);
2105 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
2106 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
2107 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
2108 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
2109 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
2110 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
2111 
2112 	/* Assign both sockaddrs, the compiler will do the right thing. */
2113 	sin = (struct sockaddr_in *)(addr + 1);
2114 	sin6 = (struct sockaddr_in6 *)(addr + 1);
2115 
2116 	if (sin6->sin6_family == AF_INET6) {
2117 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2118 			/*
2119 			 * Convert to an AF_INET sockaddr.  This means the
2120 			 * return messages will have the extra space, but have
2121 			 * AF_INET sockaddrs instead of AF_INET6.
2122 			 *
2123 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
2124 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
2125 			 * equal to AF_INET <v4>, it shouldnt be a huge
2126 			 * problem.
2127 			 */
2128 			sin->sin_family = AF_INET;
2129 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
2130 			    &sin->sin_addr);
2131 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
2132 			normalized = B_TRUE;
2133 		}
2134 	} else if (sin->sin_family != AF_INET) {
2135 		switch (ext->sadb_ext_type) {
2136 		case SADB_EXT_ADDRESS_SRC:
2137 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
2138 			break;
2139 		case SADB_EXT_ADDRESS_DST:
2140 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
2141 			break;
2142 		case SADB_X_EXT_ADDRESS_INNER_SRC:
2143 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
2144 			break;
2145 		case SADB_X_EXT_ADDRESS_INNER_DST:
2146 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
2147 			break;
2148 		case SADB_X_EXT_ADDRESS_NATT_LOC:
2149 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
2150 			break;
2151 		case SADB_X_EXT_ADDRESS_NATT_REM:
2152 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
2153 			break;
2154 			/* There is no default, see above ASSERT. */
2155 		}
2156 bail:
2157 		if (pfkey_q != NULL) {
2158 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2159 			    serial);
2160 		} else {
2161 			/*
2162 			 * Scribble in sadb_msg that we got passed in.
2163 			 * Overload "mp" to be an sadb_msg pointer.
2164 			 */
2165 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2166 
2167 			samsg->sadb_msg_errno = EINVAL;
2168 			samsg->sadb_x_msg_diagnostic = diagnostic;
2169 		}
2170 		return (KS_IN_ADDR_UNKNOWN);
2171 	}
2172 
2173 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2174 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2175 		/*
2176 		 * We need only check for prefix issues.
2177 		 */
2178 
2179 		/* Set diagnostic now, in case we need it later. */
2180 		diagnostic =
2181 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2182 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2183 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2184 
2185 		if (normalized)
2186 			addr->sadb_address_prefixlen -= 96;
2187 
2188 		/*
2189 		 * Verify and mask out inner-addresses based on prefix length.
2190 		 */
2191 		if (sin->sin_family == AF_INET) {
2192 			if (addr->sadb_address_prefixlen > 32)
2193 				goto bail;
2194 			sin->sin_addr.s_addr &=
2195 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2196 		} else {
2197 			in6_addr_t mask;
2198 
2199 			ASSERT(sin->sin_family == AF_INET6);
2200 			/*
2201 			 * ip_plen_to_mask_v6() returns NULL if the value in
2202 			 * question is out of range.
2203 			 */
2204 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2205 			    &mask) == NULL)
2206 				goto bail;
2207 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2208 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2209 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2210 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2211 		}
2212 
2213 		/* We don't care in these cases. */
2214 		return (KS_IN_ADDR_DONTCARE);
2215 	}
2216 
2217 	if (sin->sin_family == AF_INET6) {
2218 		/* Check the easy ones now. */
2219 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2220 			return (KS_IN_ADDR_MBCAST);
2221 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2222 			return (KS_IN_ADDR_UNSPEC);
2223 		/*
2224 		 * At this point, we're a unicast IPv6 address.
2225 		 *
2226 		 * A ctable lookup for local is sufficient here.  If we're
2227 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2228 		 *
2229 		 * XXX Zones alert -> me/notme decision needs to be tempered
2230 		 * by what zone we're in when we go to zone-aware IPsec.
2231 		 */
2232 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2233 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
2234 		    ns->netstack_ip);
2235 		if (ire != NULL) {
2236 			/* Hey hey, it's local. */
2237 			IRE_REFRELE(ire);
2238 			return (KS_IN_ADDR_ME);
2239 		}
2240 	} else {
2241 		ASSERT(sin->sin_family == AF_INET);
2242 		if (sin->sin_addr.s_addr == INADDR_ANY)
2243 			return (KS_IN_ADDR_UNSPEC);
2244 		if (CLASSD(sin->sin_addr.s_addr))
2245 			return (KS_IN_ADDR_MBCAST);
2246 		/*
2247 		 * At this point we're a unicast or broadcast IPv4 address.
2248 		 *
2249 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2250 		 * A NULL return value is NOTME, otherwise, look at the
2251 		 * returned ire for broadcast or not and return accordingly.
2252 		 *
2253 		 * XXX Zones alert -> me/notme decision needs to be tempered
2254 		 * by what zone we're in when we go to zone-aware IPsec.
2255 		 */
2256 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2257 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2258 		    MATCH_IRE_TYPE, ns->netstack_ip);
2259 		if (ire != NULL) {
2260 			/* Check for local or broadcast */
2261 			type = ire->ire_type;
2262 			IRE_REFRELE(ire);
2263 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2264 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2265 			    KS_IN_ADDR_MBCAST);
2266 		}
2267 	}
2268 
2269 	return (KS_IN_ADDR_NOTME);
2270 }
2271 
2272 /*
2273  * Address normalizations and reality checks for inbound PF_KEY messages.
2274  *
2275  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2276  * the source to AF_INET.  Do the same for the inner sources.
2277  */
2278 boolean_t
2279 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
2280 {
2281 	struct sockaddr_in *src, *isrc;
2282 	struct sockaddr_in6 *dst, *idst;
2283 	sadb_address_t *srcext, *dstext;
2284 	uint16_t sport;
2285 	sadb_ext_t **extv = ksi->ks_in_extv;
2286 	int rc;
2287 
2288 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2289 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2290 		    ksi->ks_in_serial, ns);
2291 		if (rc == KS_IN_ADDR_UNKNOWN)
2292 			return (B_FALSE);
2293 		if (rc == KS_IN_ADDR_MBCAST) {
2294 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2295 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2296 			return (B_FALSE);
2297 		}
2298 		ksi->ks_in_srctype = rc;
2299 	}
2300 
2301 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2302 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2303 		    ksi->ks_in_serial, ns);
2304 		if (rc == KS_IN_ADDR_UNKNOWN)
2305 			return (B_FALSE);
2306 		if (rc == KS_IN_ADDR_UNSPEC) {
2307 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2308 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2309 			return (B_FALSE);
2310 		}
2311 		ksi->ks_in_dsttype = rc;
2312 	}
2313 
2314 	/*
2315 	 * NAT-Traversal addrs are simple enough to not require all of
2316 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2317 	 * AF_INET.
2318 	 */
2319 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2320 		rc = sadb_addrcheck(pfkey_q, mp,
2321 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
2322 
2323 		/*
2324 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
2325 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
2326 		 * AND local-port flexibility).
2327 		 */
2328 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
2329 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2330 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2331 			    ksi->ks_in_serial);
2332 			return (B_FALSE);
2333 		}
2334 		src = (struct sockaddr_in *)
2335 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2336 		if (src->sin_family != AF_INET) {
2337 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2338 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2339 			    ksi->ks_in_serial);
2340 			return (B_FALSE);
2341 		}
2342 	}
2343 
2344 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2345 		rc = sadb_addrcheck(pfkey_q, mp,
2346 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2347 
2348 		/*
2349 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2350 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2351 		 */
2352 		if (rc != KS_IN_ADDR_NOTME &&
2353 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2354 		    rc == KS_IN_ADDR_UNSPEC)) {
2355 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2356 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2357 			    ksi->ks_in_serial);
2358 			return (B_FALSE);
2359 		}
2360 		src = (struct sockaddr_in *)
2361 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2362 		if (src->sin_family != AF_INET) {
2363 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2364 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2365 			    ksi->ks_in_serial);
2366 			return (B_FALSE);
2367 		}
2368 	}
2369 
2370 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2371 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2372 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2373 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2374 			    ksi->ks_in_serial);
2375 			return (B_FALSE);
2376 		}
2377 
2378 		if (sadb_addrcheck(pfkey_q, mp,
2379 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2380 		    == KS_IN_ADDR_UNKNOWN ||
2381 		    sadb_addrcheck(pfkey_q, mp,
2382 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2383 		    == KS_IN_ADDR_UNKNOWN)
2384 			return (B_FALSE);
2385 
2386 		isrc = (struct sockaddr_in *)
2387 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2388 		    1);
2389 		idst = (struct sockaddr_in6 *)
2390 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2391 		    1);
2392 		if (isrc->sin_family != idst->sin6_family) {
2393 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2394 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2395 			    ksi->ks_in_serial);
2396 			return (B_FALSE);
2397 		}
2398 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2399 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2400 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2401 			    ksi->ks_in_serial);
2402 			return (B_FALSE);
2403 	} else {
2404 		isrc = NULL;	/* For inner/outer port check below. */
2405 	}
2406 
2407 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2408 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2409 
2410 	if (dstext == NULL || srcext == NULL)
2411 		return (B_TRUE);
2412 
2413 	dst = (struct sockaddr_in6 *)(dstext + 1);
2414 	src = (struct sockaddr_in *)(srcext + 1);
2415 
2416 	if (isrc != NULL &&
2417 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2418 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2419 		/* Can't set inner and outer ports in one SA. */
2420 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2421 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2422 		    ksi->ks_in_serial);
2423 		return (B_FALSE);
2424 	}
2425 
2426 	if (dst->sin6_family == src->sin_family)
2427 		return (B_TRUE);
2428 
2429 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2430 		if (srcext->sadb_address_proto == 0) {
2431 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2432 		} else if (dstext->sadb_address_proto == 0) {
2433 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2434 		} else {
2435 			/* Inequal protocols, neither were 0.  Report error. */
2436 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2437 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2438 			    ksi->ks_in_serial);
2439 			return (B_FALSE);
2440 		}
2441 	}
2442 
2443 	/*
2444 	 * With the exception of an unspec IPv6 source and an IPv4
2445 	 * destination, address families MUST me matched.
2446 	 */
2447 	if (src->sin_family == AF_INET ||
2448 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2449 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2450 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2451 		return (B_FALSE);
2452 	}
2453 
2454 	/*
2455 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2456 	 * in the same place for sockaddr_in and sockaddr_in6.
2457 	 */
2458 	sport = src->sin_port;
2459 	bzero(src, sizeof (*src));
2460 	src->sin_family = AF_INET;
2461 	src->sin_port = sport;
2462 
2463 	return (B_TRUE);
2464 }
2465 
2466 /*
2467  * Set the results in "addrtype", given an IRE as requested by
2468  * sadb_addrcheck().
2469  */
2470 int
2471 sadb_addrset(ire_t *ire)
2472 {
2473 	if ((ire->ire_type & IRE_BROADCAST) ||
2474 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2475 	    (ire->ire_ipversion == IPV6_VERSION &&
2476 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2477 		return (KS_IN_ADDR_MBCAST);
2478 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2479 		return (KS_IN_ADDR_ME);
2480 	return (KS_IN_ADDR_NOTME);
2481 }
2482 
2483 
2484 /*
2485  * Walker callback function to delete sa's based on src/dst address.
2486  * Assumes that we're called with *head locked, no other locks held;
2487  * Conveniently, and not coincidentally, this is both what sadb_walker
2488  * gives us and also what sadb_unlinkassoc expects.
2489  */
2490 
2491 struct sadb_purge_state
2492 {
2493 	uint32_t *src;
2494 	uint32_t *dst;
2495 	sa_family_t af;
2496 	boolean_t inbnd;
2497 	char *sidstr;
2498 	char *didstr;
2499 	uint16_t sidtype;
2500 	uint16_t didtype;
2501 	uint32_t kmproto;
2502 	uint8_t sadb_sa_state;
2503 	mblk_t *mq;
2504 	sadb_t *sp;
2505 };
2506 
2507 static void
2508 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2509 {
2510 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2511 
2512 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2513 
2514 	mutex_enter(&entry->ipsa_lock);
2515 
2516 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2517 	    (ps->src != NULL &&
2518 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2519 	    (ps->dst != NULL &&
2520 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2521 	    (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) &&
2522 	    !(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2523 	    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2524 	    (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) &&
2525 	    !(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2526 	    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2527 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2528 		mutex_exit(&entry->ipsa_lock);
2529 		return;
2530 	}
2531 
2532 	if (ps->inbnd) {
2533 		sadb_delete_cluster(entry);
2534 	}
2535 	entry->ipsa_state = IPSA_STATE_DEAD;
2536 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2537 }
2538 
2539 /*
2540  * Common code to purge an SA with a matching src or dst address.
2541  * Don't kill larval SA's in such a purge.
2542  */
2543 int
2544 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2545     queue_t *ip_q)
2546 {
2547 	sadb_address_t *dstext =
2548 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2549 	sadb_address_t *srcext =
2550 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2551 	sadb_ident_t *dstid =
2552 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2553 	sadb_ident_t *srcid =
2554 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2555 	sadb_x_kmc_t *kmc =
2556 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2557 	struct sockaddr_in *src, *dst;
2558 	struct sockaddr_in6 *src6, *dst6;
2559 	struct sadb_purge_state ps;
2560 
2561 	/*
2562 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2563 	 * takes care of them.
2564 	 */
2565 
2566 	/* enforced by caller */
2567 	ASSERT((dstext != NULL) || (srcext != NULL));
2568 
2569 	ps.src = NULL;
2570 	ps.dst = NULL;
2571 #ifdef DEBUG
2572 	ps.af = (sa_family_t)-1;
2573 #endif
2574 	ps.mq = NULL;
2575 	ps.sidstr = NULL;
2576 	ps.didstr = NULL;
2577 	ps.kmproto = SADB_X_KMP_MAX + 1;
2578 
2579 	if (dstext != NULL) {
2580 		dst = (struct sockaddr_in *)(dstext + 1);
2581 		ps.af = dst->sin_family;
2582 		if (dst->sin_family == AF_INET6) {
2583 			dst6 = (struct sockaddr_in6 *)dst;
2584 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2585 		} else {
2586 			ps.dst = (uint32_t *)&dst->sin_addr;
2587 		}
2588 	}
2589 
2590 	if (srcext != NULL) {
2591 		src = (struct sockaddr_in *)(srcext + 1);
2592 		ps.af = src->sin_family;
2593 		if (src->sin_family == AF_INET6) {
2594 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2595 			ps.src = (uint32_t *)&src6->sin6_addr;
2596 		} else {
2597 			ps.src = (uint32_t *)&src->sin_addr;
2598 		}
2599 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2600 	}
2601 
2602 	ASSERT(ps.af != (sa_family_t)-1);
2603 
2604 	if (dstid != NULL) {
2605 		/*
2606 		 * NOTE:  May need to copy string in the future
2607 		 * if the inbound keysock message disappears for some strange
2608 		 * reason.
2609 		 */
2610 		ps.didstr = (char *)(dstid + 1);
2611 		ps.didtype = dstid->sadb_ident_type;
2612 	}
2613 
2614 	if (srcid != NULL) {
2615 		/*
2616 		 * NOTE:  May need to copy string in the future
2617 		 * if the inbound keysock message disappears for some strange
2618 		 * reason.
2619 		 */
2620 		ps.sidstr = (char *)(srcid + 1);
2621 		ps.sidtype = srcid->sadb_ident_type;
2622 	}
2623 
2624 	if (kmc != NULL)
2625 		ps.kmproto = kmc->sadb_x_kmc_proto;
2626 
2627 	/*
2628 	 * This is simple, crude, and effective.
2629 	 * Unimplemented optimizations (TBD):
2630 	 * - we can limit how many places we search based on where we
2631 	 * think the SA is filed.
2632 	 * - if we get a dst address, we can hash based on dst addr to find
2633 	 * the correct bucket in the outbound table.
2634 	 */
2635 	ps.inbnd = B_TRUE;
2636 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2637 	ps.inbnd = B_FALSE;
2638 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2639 
2640 	if (ps.mq != NULL)
2641 		sadb_drain_torchq(ip_q, ps.mq);
2642 
2643 	ASSERT(mp->b_cont != NULL);
2644 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2645 	    NULL);
2646 	return (0);
2647 }
2648 
2649 static void
2650 sadb_delpair_state(isaf_t *head, ipsa_t *entry, void *cookie)
2651 {
2652 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2653 	isaf_t  *inbound_bucket;
2654 	ipsa_t *peer_assoc;
2655 
2656 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2657 
2658 	mutex_enter(&entry->ipsa_lock);
2659 
2660 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2661 	    ((ps->src != NULL) &&
2662 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af))) {
2663 		mutex_exit(&entry->ipsa_lock);
2664 		return;
2665 	}
2666 
2667 	/*
2668 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2669 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2670 	 * ordering. The sadb_walker() which triggers this function is called
2671 	 * only on the outbound fanout, and the corresponding inbound bucket
2672 	 * lock is safe to acquire here.
2673 	 */
2674 
2675 	if (entry->ipsa_haspeer) {
2676 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_spi);
2677 		mutex_enter(&inbound_bucket->isaf_lock);
2678 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2679 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2680 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2681 	} else {
2682 		inbound_bucket = INBOUND_BUCKET(ps->sp, entry->ipsa_otherspi);
2683 		mutex_enter(&inbound_bucket->isaf_lock);
2684 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2685 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2686 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2687 	}
2688 
2689 	entry->ipsa_state = IPSA_STATE_DEAD;
2690 	(void) sadb_torch_assoc(head, entry, B_FALSE, &ps->mq);
2691 	if (peer_assoc != NULL) {
2692 		mutex_enter(&peer_assoc->ipsa_lock);
2693 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2694 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc,
2695 		    B_FALSE, &ps->mq);
2696 	}
2697 	mutex_exit(&inbound_bucket->isaf_lock);
2698 }
2699 
2700 /*
2701  * Common code to delete/get an SA.
2702  */
2703 int
2704 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2705     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2706 {
2707 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2708 	sadb_address_t *srcext =
2709 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2710 	sadb_address_t *dstext =
2711 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2712 	ipsa_t *echo_target = NULL;
2713 	ipsap_t *ipsapp;
2714 	mblk_t *torchq = NULL;
2715 	uint_t	error = 0;
2716 
2717 	if (assoc == NULL) {
2718 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2719 		return (EINVAL);
2720 	}
2721 
2722 	if (sadb_msg_type == SADB_X_DELPAIR_STATE) {
2723 		struct sockaddr_in *src;
2724 		struct sockaddr_in6 *src6;
2725 		struct sadb_purge_state ps;
2726 
2727 		if (srcext == NULL) {
2728 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2729 			return (EINVAL);
2730 		}
2731 		ps.src = NULL;
2732 		ps.mq = NULL;
2733 		src = (struct sockaddr_in *)(srcext + 1);
2734 		ps.af = src->sin_family;
2735 		if (src->sin_family == AF_INET6) {
2736 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2737 			ps.src = (uint32_t *)&src6->sin6_addr;
2738 			ps.sp = &spp->s_v6;
2739 		} else {
2740 			ps.src = (uint32_t *)&src->sin_addr;
2741 			ps.sp = &spp->s_v4;
2742 		}
2743 		ps.inbnd = B_FALSE;
2744 		ps.sadb_sa_state = assoc->sadb_sa_state;
2745 		sadb_walker(ps.sp->sdb_of, ps.sp->sdb_hashsize,
2746 		    sadb_delpair_state, &ps);
2747 
2748 		if (ps.mq != NULL)
2749 			sadb_drain_torchq(pfkey_q, ps.mq);
2750 
2751 		ASSERT(mp->b_cont != NULL);
2752 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2753 		    ksi, NULL);
2754 		return (0);
2755 	}
2756 
2757 	if (dstext == NULL) {
2758 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2759 		return (EINVAL);
2760 	}
2761 
2762 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
2763 	if (ipsapp == NULL) {
2764 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2765 		return (ESRCH);
2766 	}
2767 
2768 	echo_target = ipsapp->ipsap_sa_ptr;
2769 	if (echo_target == NULL)
2770 		echo_target = ipsapp->ipsap_psa_ptr;
2771 
2772 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2773 		/*
2774 		 * Bucket locks will be required if SA is actually unlinked.
2775 		 * get_ipsa_pair() returns valid hash bucket pointers even
2776 		 * if it can't find a pair SA pointer. To prevent a potential
2777 		 * deadlock, always lock the outbound bucket before the inbound.
2778 		 */
2779 		if (ipsapp->in_inbound_table) {
2780 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2781 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2782 		} else {
2783 			mutex_enter(&ipsapp->ipsap_bucket->isaf_lock);
2784 			mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2785 		}
2786 
2787 		if (ipsapp->ipsap_sa_ptr != NULL) {
2788 			mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2789 			if (ipsapp->ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2790 				sadb_delete_cluster(ipsapp->ipsap_sa_ptr);
2791 			}
2792 			ipsapp->ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2793 			(void) sadb_torch_assoc(ipsapp->ipsap_bucket,
2794 			    ipsapp->ipsap_sa_ptr, B_FALSE, &torchq);
2795 			/*
2796 			 * sadb_torch_assoc() releases the ipsa_lock
2797 			 * and calls sadb_unlinkassoc() which does a
2798 			 * IPSA_REFRELE.
2799 			 */
2800 		}
2801 		if (ipsapp->ipsap_psa_ptr != NULL) {
2802 			mutex_enter(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2803 			if (sadb_msg_type == SADB_X_DELPAIR ||
2804 			    ipsapp->ipsap_psa_ptr->ipsa_haspeer) {
2805 				if (ipsapp->ipsap_psa_ptr->ipsa_flags &
2806 				    IPSA_F_INBOUND) {
2807 					sadb_delete_cluster(
2808 					    ipsapp->ipsap_psa_ptr);
2809 				}
2810 				ipsapp->ipsap_psa_ptr->ipsa_state =
2811 				    IPSA_STATE_DEAD;
2812 				(void) sadb_torch_assoc(ipsapp->ipsap_pbucket,
2813 				    ipsapp->ipsap_psa_ptr, B_FALSE, &torchq);
2814 			} else {
2815 				/*
2816 				 * Only half of the "pair" has been deleted.
2817 				 * Update the remaining SA and remove references
2818 				 * to its pair SA, which is now gone.
2819 				 */
2820 				ipsapp->ipsap_psa_ptr->ipsa_otherspi = 0;
2821 				ipsapp->ipsap_psa_ptr->ipsa_flags &=
2822 				    ~IPSA_F_PAIRED;
2823 				mutex_exit(&ipsapp->ipsap_psa_ptr->ipsa_lock);
2824 			}
2825 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2826 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2827 			error = ESRCH;
2828 		}
2829 		mutex_exit(&ipsapp->ipsap_bucket->isaf_lock);
2830 		mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2831 	}
2832 
2833 	if (torchq != NULL)
2834 		sadb_drain_torchq(spp->s_ip_q, torchq);
2835 
2836 	ASSERT(mp->b_cont != NULL);
2837 
2838 	if (error == 0)
2839 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2840 		    mp->b_cont->b_rptr, ksi, echo_target);
2841 
2842 	destroy_ipsa_pair(ipsapp);
2843 
2844 	return (error);
2845 }
2846 
2847 /*
2848  * This function takes a sadb_sa_t and finds the ipsa_t structure
2849  * and the isaf_t (hash bucket) that its stored under. If the security
2850  * association has a peer, the ipsa_t structure and bucket for that security
2851  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2852  * are returned as a ipsap_t.
2853  *
2854  * The hash buckets are returned for convenience, if the calling function
2855  * needs to use the hash bucket locks, say to remove the SA's, it should
2856  * take care to observe the convention of locking outbound bucket then
2857  * inbound bucket. The flag in_inbound_table provides direction.
2858  *
2859  * Note that a "pair" is defined as one (but not both) of the following:
2860  *
2861  * A security association which has a soft reference to another security
2862  * association via its SPI.
2863  *
2864  * A security association that is not obviously "inbound" or "outbound" so
2865  * it appears in both hash tables, the "peer" being the same security
2866  * association in the other hash table.
2867  *
2868  * This function will return NULL if the ipsa_t can't be found in the
2869  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2870  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2871  * provided at least one ipsa_t is found.
2872  */
2873 ipsap_t *
2874 get_ipsa_pair(sadb_sa_t *assoc, sadb_address_t *srcext, sadb_address_t *dstext,
2875     sadbp_t *spp)
2876 {
2877 	struct sockaddr_in *src, *dst;
2878 	struct sockaddr_in6 *src6, *dst6;
2879 	sadb_t *sp;
2880 	uint32_t *srcaddr, *dstaddr;
2881 	isaf_t *outbound_bucket, *inbound_bucket;
2882 	ipsap_t *ipsapp;
2883 	sa_family_t af;
2884 
2885 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2886 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2887 	uint32_t pair_spi;
2888 
2889 	ipsapp = kmem_zalloc(sizeof (*ipsapp), KM_NOSLEEP);
2890 	if (ipsapp == NULL)
2891 		return (NULL);
2892 
2893 	ipsapp->in_inbound_table = B_FALSE;
2894 
2895 	/*
2896 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2897 	 * takes care of them.
2898 	 */
2899 
2900 	dst = (struct sockaddr_in *)(dstext + 1);
2901 	af = dst->sin_family;
2902 	if (af == AF_INET6) {
2903 		sp = &spp->s_v6;
2904 		dst6 = (struct sockaddr_in6 *)dst;
2905 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2906 		if (srcext != NULL) {
2907 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2908 			srcaddr = (uint32_t *)&src6->sin6_addr;
2909 			ASSERT(src6->sin6_family == af);
2910 			ASSERT(src6->sin6_family == AF_INET6);
2911 		} else {
2912 			srcaddr = ALL_ZEROES_PTR;
2913 		}
2914 		outbound_bucket = OUTBOUND_BUCKET_V6(sp,
2915 		    *(uint32_t *)dstaddr);
2916 	} else {
2917 		sp = &spp->s_v4;
2918 		dstaddr = (uint32_t *)&dst->sin_addr;
2919 		if (srcext != NULL) {
2920 			src = (struct sockaddr_in *)(srcext + 1);
2921 			srcaddr = (uint32_t *)&src->sin_addr;
2922 			ASSERT(src->sin_family == af);
2923 			ASSERT(src->sin_family == AF_INET);
2924 		} else {
2925 			srcaddr = ALL_ZEROES_PTR;
2926 		}
2927 		outbound_bucket = OUTBOUND_BUCKET_V4(sp,
2928 		    *(uint32_t *)dstaddr);
2929 	}
2930 
2931 	inbound_bucket = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2932 
2933 	/* Lock down both buckets. */
2934 	mutex_enter(&outbound_bucket->isaf_lock);
2935 	mutex_enter(&inbound_bucket->isaf_lock);
2936 
2937 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2938 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(inbound_bucket,
2939 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2940 		if (ipsapp->ipsap_sa_ptr != NULL) {
2941 			ipsapp->ipsap_bucket = inbound_bucket;
2942 			ipsapp->ipsap_pbucket = outbound_bucket;
2943 			ipsapp->in_inbound_table = B_TRUE;
2944 		} else {
2945 			ipsapp->ipsap_sa_ptr =
2946 			    ipsec_getassocbyspi(outbound_bucket,
2947 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2948 			ipsapp->ipsap_bucket = outbound_bucket;
2949 			ipsapp->ipsap_pbucket = inbound_bucket;
2950 		}
2951 	} else {
2952 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2953 		ipsapp->ipsap_sa_ptr =
2954 		    ipsec_getassocbyspi(outbound_bucket,
2955 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2956 		if (ipsapp->ipsap_sa_ptr != NULL) {
2957 			ipsapp->ipsap_bucket = outbound_bucket;
2958 			ipsapp->ipsap_pbucket = inbound_bucket;
2959 		} else {
2960 			ipsapp->ipsap_sa_ptr =
2961 			    ipsec_getassocbyspi(inbound_bucket,
2962 			    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2963 			ipsapp->ipsap_bucket = inbound_bucket;
2964 			ipsapp->ipsap_pbucket = outbound_bucket;
2965 			if (ipsapp->ipsap_sa_ptr != NULL)
2966 				ipsapp->in_inbound_table = B_TRUE;
2967 		}
2968 	}
2969 
2970 	if (ipsapp->ipsap_sa_ptr == NULL) {
2971 		mutex_exit(&outbound_bucket->isaf_lock);
2972 		mutex_exit(&inbound_bucket->isaf_lock);
2973 		kmem_free(ipsapp, sizeof (*ipsapp));
2974 		return (NULL);
2975 	}
2976 
2977 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2978 	    ipsapp->in_inbound_table) {
2979 		mutex_exit(&outbound_bucket->isaf_lock);
2980 		mutex_exit(&inbound_bucket->isaf_lock);
2981 		return (ipsapp);
2982 	}
2983 
2984 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2985 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2986 		/*
2987 		 * haspeer implies no sa_pairing, look for same spi
2988 		 * in other hashtable.
2989 		 */
2990 		ipsapp->ipsap_psa_ptr =
2991 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2992 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2993 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2994 		mutex_exit(&outbound_bucket->isaf_lock);
2995 		mutex_exit(&inbound_bucket->isaf_lock);
2996 		return (ipsapp);
2997 	}
2998 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2999 	IPSA_COPY_ADDR(&pair_srcaddr,
3000 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, af);
3001 	IPSA_COPY_ADDR(&pair_dstaddr,
3002 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, af);
3003 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
3004 	mutex_exit(&outbound_bucket->isaf_lock);
3005 	mutex_exit(&inbound_bucket->isaf_lock);
3006 
3007 	if (pair_spi == 0) {
3008 		ASSERT(ipsapp->ipsap_bucket != NULL);
3009 		ASSERT(ipsapp->ipsap_pbucket != NULL);
3010 		return (ipsapp);
3011 	}
3012 
3013 	/* found sa in outbound sadb, peer should be inbound */
3014 
3015 	if (ipsapp->in_inbound_table) {
3016 		/* Found SA in inbound table, pair will be in outbound. */
3017 		if (af == AF_INET6) {
3018 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sp,
3019 			    *(uint32_t *)pair_srcaddr);
3020 		} else {
3021 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sp,
3022 			    *(uint32_t *)pair_srcaddr);
3023 		}
3024 	} else {
3025 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sp, pair_spi);
3026 	}
3027 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
3028 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
3029 	    pair_spi, pair_dstaddr, pair_srcaddr, af);
3030 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
3031 	ASSERT(ipsapp->ipsap_bucket != NULL);
3032 	ASSERT(ipsapp->ipsap_pbucket != NULL);
3033 	return (ipsapp);
3034 }
3035 
3036 /*
3037  * Initialize the mechanism parameters associated with an SA.
3038  * These parameters can be shared by multiple packets, which saves
3039  * us from the overhead of consulting the algorithm table for
3040  * each packet.
3041  */
3042 static void
3043 sadb_init_alginfo(ipsa_t *sa)
3044 {
3045 	ipsec_alginfo_t *alg;
3046 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
3047 
3048 	mutex_enter(&ipss->ipsec_alg_lock);
3049 
3050 	if (sa->ipsa_encrkey != NULL) {
3051 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
3052 		if (alg != NULL && ALG_VALID(alg)) {
3053 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
3054 			sa->ipsa_emech.cm_param = NULL;
3055 			sa->ipsa_emech.cm_param_len = 0;
3056 			sa->ipsa_iv_len = alg->alg_datalen;
3057 		} else
3058 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3059 	}
3060 
3061 	if (sa->ipsa_authkey != NULL) {
3062 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
3063 		if (alg != NULL && ALG_VALID(alg)) {
3064 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
3065 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
3066 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
3067 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
3068 		} else
3069 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3070 	}
3071 
3072 	mutex_exit(&ipss->ipsec_alg_lock);
3073 }
3074 
3075 /*
3076  * Perform NAT-traversal cached checksum offset calculations here.
3077  */
3078 static void
3079 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
3080     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
3081     uint32_t *dst_addr_ptr)
3082 {
3083 	struct sockaddr_in *natt_loc, *natt_rem;
3084 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
3085 	uint32_t running_sum = 0;
3086 
3087 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
3088 
3089 	if (natt_rem_ext != NULL) {
3090 		uint32_t l_src;
3091 		uint32_t l_rem;
3092 
3093 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
3094 
3095 		/* Ensured by sadb_addrfix(). */
3096 		ASSERT(natt_rem->sin_family == AF_INET);
3097 
3098 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
3099 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
3100 		l_src = *src_addr_ptr;
3101 		l_rem = *natt_rem_ptr;
3102 
3103 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3104 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
3105 
3106 		l_src = ntohl(l_src);
3107 		DOWN_SUM(l_src);
3108 		DOWN_SUM(l_src);
3109 		l_rem = ntohl(l_rem);
3110 		DOWN_SUM(l_rem);
3111 		DOWN_SUM(l_rem);
3112 
3113 		/*
3114 		 * We're 1's complement for checksums, so check for wraparound
3115 		 * here.
3116 		 */
3117 		if (l_rem > l_src)
3118 			l_src--;
3119 
3120 		running_sum += l_src - l_rem;
3121 
3122 		DOWN_SUM(running_sum);
3123 		DOWN_SUM(running_sum);
3124 	}
3125 
3126 	if (natt_loc_ext != NULL) {
3127 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
3128 
3129 		/* Ensured by sadb_addrfix(). */
3130 		ASSERT(natt_loc->sin_family == AF_INET);
3131 
3132 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
3133 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
3134 
3135 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
3136 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
3137 
3138 		/*
3139 		 * NAT-T port agility means we may have natt_loc_ext, but
3140 		 * only for a local-port change.
3141 		 */
3142 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
3143 			uint32_t l_dst = ntohl(*dst_addr_ptr);
3144 			uint32_t l_loc = ntohl(*natt_loc_ptr);
3145 
3146 			DOWN_SUM(l_loc);
3147 			DOWN_SUM(l_loc);
3148 			DOWN_SUM(l_dst);
3149 			DOWN_SUM(l_dst);
3150 
3151 			/*
3152 			 * We're 1's complement for checksums, so check for
3153 			 * wraparound here.
3154 			 */
3155 			if (l_loc > l_dst)
3156 				l_dst--;
3157 
3158 			running_sum += l_dst - l_loc;
3159 			DOWN_SUM(running_sum);
3160 			DOWN_SUM(running_sum);
3161 		}
3162 	}
3163 
3164 	newbie->ipsa_inbound_cksum = running_sum;
3165 #undef DOWN_SUM
3166 }
3167 
3168 /*
3169  * This function is called from consumers that need to insert a fully-grown
3170  * security association into its tables.  This function takes into account that
3171  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
3172  * hash bucket parameters are set in order of what the SA will be most of the
3173  * time.  (For example, an SA with an unspecified source, and a multicast
3174  * destination will primarily be an outbound SA.  OTOH, if that destination
3175  * is unicast for this node, then the SA will primarily be inbound.)
3176  *
3177  * It takes a lot of parameters because even if clone is B_FALSE, this needs
3178  * to check both buckets for purposes of collision.
3179  *
3180  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
3181  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
3182  * with additional diagnostic information because there is at least one EINVAL
3183  * case here.
3184  */
3185 int
3186 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
3187     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
3188     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
3189     netstack_t *ns, sadbp_t *spp)
3190 {
3191 	ipsa_t *newbie_clone = NULL, *scratch;
3192 	ipsap_t *ipsapp = NULL;
3193 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3194 	sadb_address_t *srcext =
3195 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3196 	sadb_address_t *dstext =
3197 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3198 	sadb_address_t *isrcext =
3199 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3200 	sadb_address_t *idstext =
3201 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3202 	sadb_x_kmc_t *kmcext =
3203 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3204 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3205 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3206 	sadb_x_pair_t *pair_ext =
3207 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
3208 	sadb_x_replay_ctr_t *replayext =
3209 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
3210 	uint8_t protocol =
3211 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
3212 #if 0
3213 	/*
3214 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
3215 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
3216 	 */
3217 	sadb_sens_t *sens = (sadb_sens_t *);
3218 #endif
3219 	struct sockaddr_in *src, *dst, *isrc, *idst;
3220 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
3221 	sadb_lifetime_t *soft =
3222 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3223 	sadb_lifetime_t *hard =
3224 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3225 	sadb_lifetime_t	*idle =
3226 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3227 	sa_family_t af;
3228 	int error = 0;
3229 	boolean_t isupdate = (newbie != NULL);
3230 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
3231 	mblk_t *ctl_mp = NULL;
3232 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3233 	int		rcode;
3234 
3235 	if (srcext == NULL) {
3236 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3237 		return (EINVAL);
3238 	}
3239 	if (dstext == NULL) {
3240 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3241 		return (EINVAL);
3242 	}
3243 	if (assoc == NULL) {
3244 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3245 		return (EINVAL);
3246 	}
3247 
3248 	src = (struct sockaddr_in *)(srcext + 1);
3249 	src6 = (struct sockaddr_in6 *)(srcext + 1);
3250 	dst = (struct sockaddr_in *)(dstext + 1);
3251 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
3252 	if (isrcext != NULL) {
3253 		isrc = (struct sockaddr_in *)(isrcext + 1);
3254 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
3255 		ASSERT(idstext != NULL);
3256 		idst = (struct sockaddr_in *)(idstext + 1);
3257 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
3258 	} else {
3259 		isrc = NULL;
3260 		isrc6 = NULL;
3261 	}
3262 
3263 	af = src->sin_family;
3264 
3265 	if (af == AF_INET) {
3266 		src_addr_ptr = (uint32_t *)&src->sin_addr;
3267 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
3268 	} else {
3269 		ASSERT(af == AF_INET6);
3270 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
3271 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
3272 	}
3273 
3274 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
3275 	    cl_inet_checkspi &&
3276 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3277 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
3278 		    assoc->sadb_sa_spi, NULL);
3279 		if (rcode == -1) {
3280 			return (EEXIST);
3281 		}
3282 	}
3283 
3284 	/*
3285 	 * Check to see if the new SA will be cloned AND paired. The
3286 	 * reason a SA will be cloned is the source or destination addresses
3287 	 * are not specific enough to determine if the SA goes in the outbound
3288 	 * or the inbound hash table, so its cloned and put in both. If
3289 	 * the SA is paired, it's soft linked to another SA for the other
3290 	 * direction. Keeping track and looking up SA's that are direction
3291 	 * unspecific and linked is too hard.
3292 	 */
3293 	if (clone && (pair_ext != NULL)) {
3294 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3295 		return (EINVAL);
3296 	}
3297 
3298 	if (!isupdate) {
3299 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3300 		    src_addr_ptr, dst_addr_ptr, af, ns);
3301 		if (newbie == NULL)
3302 			return (ENOMEM);
3303 	}
3304 
3305 	mutex_enter(&newbie->ipsa_lock);
3306 
3307 	if (isrc != NULL) {
3308 		if (isrc->sin_family == AF_INET) {
3309 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3310 				if (srcext->sadb_address_proto != 0) {
3311 					/*
3312 					 * Mismatched outer-packet protocol
3313 					 * and inner-packet address family.
3314 					 */
3315 					mutex_exit(&newbie->ipsa_lock);
3316 					error = EPROTOTYPE;
3317 					*diagnostic =
3318 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3319 					goto error;
3320 				} else {
3321 					/* Fill in with explicit protocol. */
3322 					srcext->sadb_address_proto =
3323 					    IPPROTO_ENCAP;
3324 					dstext->sadb_address_proto =
3325 					    IPPROTO_ENCAP;
3326 				}
3327 			}
3328 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3329 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3330 		} else {
3331 			ASSERT(isrc->sin_family == AF_INET6);
3332 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3333 				if (srcext->sadb_address_proto != 0) {
3334 					/*
3335 					 * Mismatched outer-packet protocol
3336 					 * and inner-packet address family.
3337 					 */
3338 					mutex_exit(&newbie->ipsa_lock);
3339 					error = EPROTOTYPE;
3340 					*diagnostic =
3341 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3342 					goto error;
3343 				} else {
3344 					/* Fill in with explicit protocol. */
3345 					srcext->sadb_address_proto =
3346 					    IPPROTO_IPV6;
3347 					dstext->sadb_address_proto =
3348 					    IPPROTO_IPV6;
3349 				}
3350 			}
3351 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3352 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3353 		}
3354 		newbie->ipsa_innerfam = isrc->sin_family;
3355 
3356 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3357 		    newbie->ipsa_innerfam);
3358 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3359 		    newbie->ipsa_innerfam);
3360 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3361 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3362 
3363 		/* Unique value uses inner-ports for Tunnel Mode... */
3364 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3365 		    idst->sin_port, dstext->sadb_address_proto,
3366 		    idstext->sadb_address_proto);
3367 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3368 		    idst->sin_port, dstext->sadb_address_proto,
3369 		    idstext->sadb_address_proto);
3370 	} else {
3371 		/* ... and outer-ports for Transport Mode. */
3372 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3373 		    dst->sin_port, dstext->sadb_address_proto, 0);
3374 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3375 		    dst->sin_port, dstext->sadb_address_proto, 0);
3376 	}
3377 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3378 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3379 
3380 	sadb_nat_calculations(newbie,
3381 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3382 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3383 	    src_addr_ptr, dst_addr_ptr);
3384 
3385 	newbie->ipsa_type = samsg->sadb_msg_satype;
3386 
3387 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3388 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3389 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3390 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3391 
3392 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3393 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3394 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3395 		mutex_exit(&newbie->ipsa_lock);
3396 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3397 		error = EINVAL;
3398 		goto error;
3399 	}
3400 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3401 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3402 		mutex_exit(&newbie->ipsa_lock);
3403 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3404 		error = EINVAL;
3405 		goto error;
3406 	}
3407 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3408 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3409 		mutex_exit(&newbie->ipsa_lock);
3410 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3411 		error = EINVAL;
3412 		goto error;
3413 	}
3414 	/*
3415 	 * If unspecified source address, force replay_wsize to 0.
3416 	 * This is because an SA that has multiple sources of secure
3417 	 * traffic cannot enforce a replay counter w/o synchronizing the
3418 	 * senders.
3419 	 */
3420 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3421 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3422 	else
3423 		newbie->ipsa_replay_wsize = 0;
3424 
3425 	newbie->ipsa_addtime = gethrestime_sec();
3426 
3427 	if (kmcext != NULL) {
3428 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3429 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3430 	}
3431 
3432 	/*
3433 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3434 	 * The spec says that one can update current lifetimes, but
3435 	 * that seems impractical, especially in the larval-to-mature
3436 	 * update that this function performs.
3437 	 */
3438 	if (soft != NULL) {
3439 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3440 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3441 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3442 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3443 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3444 	}
3445 	if (hard != NULL) {
3446 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3447 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3448 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3449 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3450 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3451 	}
3452 	if (idle != NULL) {
3453 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3454 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3455 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3456 		    newbie->ipsa_idleaddlt;
3457 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3458 	}
3459 
3460 	newbie->ipsa_authtmpl = NULL;
3461 	newbie->ipsa_encrtmpl = NULL;
3462 
3463 	if (akey != NULL) {
3464 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3465 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3466 		/* In case we have to round up to the next byte... */
3467 		if ((akey->sadb_key_bits & 0x7) != 0)
3468 			newbie->ipsa_authkeylen++;
3469 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3470 		    KM_NOSLEEP);
3471 		if (newbie->ipsa_authkey == NULL) {
3472 			error = ENOMEM;
3473 			mutex_exit(&newbie->ipsa_lock);
3474 			goto error;
3475 		}
3476 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3477 		bzero(akey + 1, newbie->ipsa_authkeylen);
3478 
3479 		/*
3480 		 * Pre-initialize the kernel crypto framework key
3481 		 * structure.
3482 		 */
3483 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3484 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3485 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3486 
3487 		mutex_enter(&ipss->ipsec_alg_lock);
3488 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3489 		mutex_exit(&ipss->ipsec_alg_lock);
3490 		if (error != 0) {
3491 			mutex_exit(&newbie->ipsa_lock);
3492 			/*
3493 			 * An error here indicates that alg is the wrong type
3494 			 * (IE: not authentication) or its not in the alg tables
3495 			 * created by ipsecalgs(1m), or Kcf does not like the
3496 			 * parameters passed in with this algorithm, which is
3497 			 * probably a coding error!
3498 			 */
3499 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3500 			goto error;
3501 		}
3502 	}
3503 
3504 	if (ekey != NULL) {
3505 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3506 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3507 		/* In case we have to round up to the next byte... */
3508 		if ((ekey->sadb_key_bits & 0x7) != 0)
3509 			newbie->ipsa_encrkeylen++;
3510 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3511 		    KM_NOSLEEP);
3512 		if (newbie->ipsa_encrkey == NULL) {
3513 			error = ENOMEM;
3514 			mutex_exit(&newbie->ipsa_lock);
3515 			goto error;
3516 		}
3517 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3518 		/* XXX is this safe w.r.t db_ref, etc? */
3519 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3520 
3521 		/*
3522 		 * Pre-initialize the kernel crypto framework key
3523 		 * structure.
3524 		 */
3525 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3526 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3527 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3528 
3529 		mutex_enter(&ipss->ipsec_alg_lock);
3530 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3531 		mutex_exit(&ipss->ipsec_alg_lock);
3532 		if (error != 0) {
3533 			mutex_exit(&newbie->ipsa_lock);
3534 			/* See above for error explanation. */
3535 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3536 			goto error;
3537 		}
3538 	}
3539 
3540 	sadb_init_alginfo(newbie);
3541 
3542 	/*
3543 	 * Ptrs to processing functions.
3544 	 */
3545 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3546 		ipsecesp_init_funcs(newbie);
3547 	else
3548 		ipsecah_init_funcs(newbie);
3549 	ASSERT(newbie->ipsa_output_func != NULL &&
3550 	    newbie->ipsa_input_func != NULL);
3551 
3552 	/*
3553 	 * Certificate ID stuff.
3554 	 */
3555 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3556 		sadb_ident_t *id =
3557 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3558 
3559 		/*
3560 		 * Can assume strlen() will return okay because ext_check() in
3561 		 * keysock.c prepares the string for us.
3562 		 */
3563 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3564 		    (char *)(id+1), ns);
3565 		if (newbie->ipsa_src_cid == NULL) {
3566 			error = ENOMEM;
3567 			mutex_exit(&newbie->ipsa_lock);
3568 			goto error;
3569 		}
3570 	}
3571 
3572 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3573 		sadb_ident_t *id =
3574 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3575 
3576 		/*
3577 		 * Can assume strlen() will return okay because ext_check() in
3578 		 * keysock.c prepares the string for us.
3579 		 */
3580 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3581 		    (char *)(id+1), ns);
3582 		if (newbie->ipsa_dst_cid == NULL) {
3583 			error = ENOMEM;
3584 			mutex_exit(&newbie->ipsa_lock);
3585 			goto error;
3586 		}
3587 	}
3588 
3589 #if 0
3590 	/* XXXMLS  SENSITIVITY handling code. */
3591 	if (sens != NULL) {
3592 		int i;
3593 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3594 
3595 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3596 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3597 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3598 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3599 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3600 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3601 		    KM_NOSLEEP);
3602 		if (newbie->ipsa_integ == NULL) {
3603 			error = ENOMEM;
3604 			mutex_exit(&newbie->ipsa_lock);
3605 			goto error;
3606 		}
3607 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3608 		    KM_NOSLEEP);
3609 		if (newbie->ipsa_sens == NULL) {
3610 			error = ENOMEM;
3611 			mutex_exit(&newbie->ipsa_lock);
3612 			goto error;
3613 		}
3614 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3615 			newbie->ipsa_sens[i] = *bitmap;
3616 			bitmap++;
3617 		}
3618 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3619 			newbie->ipsa_integ[i] = *bitmap;
3620 			bitmap++;
3621 		}
3622 	}
3623 
3624 #endif
3625 
3626 	if (replayext != NULL) {
3627 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3628 		    (replayext->sadb_x_rc_replay64 != 0)) {
3629 			error = EOPNOTSUPP;
3630 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3631 			mutex_exit(&newbie->ipsa_lock);
3632 			goto error;
3633 		}
3634 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3635 	}
3636 
3637 	/* now that the SA has been updated, set its new state */
3638 	newbie->ipsa_state = assoc->sadb_sa_state;
3639 
3640 	if (clone) {
3641 		newbie->ipsa_haspeer = B_TRUE;
3642 	} else {
3643 		if (!is_inbound) {
3644 			lifetime_fuzz(newbie);
3645 		}
3646 	}
3647 	/*
3648 	 * The less locks I hold when doing an insertion and possible cloning,
3649 	 * the better!
3650 	 */
3651 	mutex_exit(&newbie->ipsa_lock);
3652 
3653 	if (clone) {
3654 		newbie_clone = sadb_cloneassoc(newbie);
3655 
3656 		if (newbie_clone == NULL) {
3657 			error = ENOMEM;
3658 			goto error;
3659 		}
3660 	}
3661 
3662 	/*
3663 	 * Enter the bucket locks.  The order of entry is outbound,
3664 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3665 	 * based on the destination address type.  If the destination address
3666 	 * type is for a node that isn't mine (or potentially mine), the
3667 	 * "primary" bucket is the outbound one.
3668 	 */
3669 	if (!is_inbound) {
3670 		/* primary == outbound */
3671 		mutex_enter(&primary->isaf_lock);
3672 		mutex_enter(&secondary->isaf_lock);
3673 	} else {
3674 		/* primary == inbound */
3675 		mutex_enter(&secondary->isaf_lock);
3676 		mutex_enter(&primary->isaf_lock);
3677 	}
3678 
3679 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3680 	    newbie->ipsa_spi));
3681 
3682 	/*
3683 	 * sadb_insertassoc() doesn't increment the reference
3684 	 * count.  We therefore have to increment the
3685 	 * reference count one more time to reflect the
3686 	 * pointers of the table that reference this SA.
3687 	 */
3688 	IPSA_REFHOLD(newbie);
3689 
3690 	if (isupdate) {
3691 		/*
3692 		 * Unlink from larval holding cell in the "inbound" fanout.
3693 		 */
3694 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3695 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3696 		sadb_unlinkassoc(newbie);
3697 	}
3698 
3699 	mutex_enter(&newbie->ipsa_lock);
3700 	error = sadb_insertassoc(newbie, primary);
3701 	if (error == 0) {
3702 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3703 		    is_inbound);
3704 	}
3705 	mutex_exit(&newbie->ipsa_lock);
3706 
3707 	if (error != 0) {
3708 		/*
3709 		 * Since sadb_insertassoc() failed, we must decrement the
3710 		 * refcount again so the cleanup code will actually free
3711 		 * the offending SA.
3712 		 */
3713 		IPSA_REFRELE(newbie);
3714 		goto error_unlock;
3715 	}
3716 
3717 	if (newbie_clone != NULL) {
3718 		mutex_enter(&newbie_clone->ipsa_lock);
3719 		error = sadb_insertassoc(newbie_clone, secondary);
3720 		mutex_exit(&newbie_clone->ipsa_lock);
3721 		if (error != 0) {
3722 			/* Collision in secondary table. */
3723 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3724 			goto error_unlock;
3725 		}
3726 		IPSA_REFHOLD(newbie_clone);
3727 	} else {
3728 		ASSERT(primary != secondary);
3729 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3730 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3731 		if (scratch != NULL) {
3732 			/* Collision in secondary table. */
3733 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3734 			/* Set the error, since ipsec_getassocbyspi() can't. */
3735 			error = EEXIST;
3736 			goto error_unlock;
3737 		}
3738 	}
3739 
3740 	/* OKAY!  So let's do some reality check assertions. */
3741 
3742 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3743 	ASSERT(newbie_clone == NULL ||
3744 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3745 	/*
3746 	 * If hardware acceleration could happen, send it.
3747 	 */
3748 	if (ctl_mp != NULL) {
3749 		putnext(ip_q, ctl_mp);
3750 		ctl_mp = NULL;
3751 	}
3752 
3753 error_unlock:
3754 
3755 	/*
3756 	 * We can exit the locks in any order.	Only entrance needs to
3757 	 * follow any protocol.
3758 	 */
3759 	mutex_exit(&secondary->isaf_lock);
3760 	mutex_exit(&primary->isaf_lock);
3761 
3762 	if (pair_ext != NULL && error == 0) {
3763 		/* update pair_spi if it exists. */
3764 		ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
3765 		if (ipsapp == NULL) {
3766 			error = ESRCH;
3767 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
3768 		} else if (ipsapp->ipsap_psa_ptr != NULL) {
3769 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3770 			error = EINVAL;
3771 		} else {
3772 			/* update_pairing() sets diagnostic */
3773 			error = update_pairing(ipsapp, ksi, diagnostic, spp);
3774 		}
3775 	}
3776 	/* Common error point for this routine. */
3777 error:
3778 	if (newbie != NULL) {
3779 		if (error != 0) {
3780 			/* This SA is broken, let the reaper clean up. */
3781 			mutex_enter(&newbie->ipsa_lock);
3782 			newbie->ipsa_state = IPSA_STATE_DEAD;
3783 			newbie->ipsa_hardexpiretime = 1;
3784 			mutex_exit(&newbie->ipsa_lock);
3785 		}
3786 		IPSA_REFRELE(newbie);
3787 	}
3788 	if (newbie_clone != NULL) {
3789 		IPSA_REFRELE(newbie_clone);
3790 	}
3791 	if (ctl_mp != NULL)
3792 		freemsg(ctl_mp);
3793 
3794 	if (error == 0) {
3795 		/*
3796 		 * Construct favorable PF_KEY return message and send to
3797 		 * keysock. Update the flags in the original keysock message
3798 		 * to reflect the actual flags in the new SA.
3799 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3800 		 * make sure to REFHOLD, call, then REFRELE.)
3801 		 */
3802 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3803 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3804 	}
3805 
3806 	destroy_ipsa_pair(ipsapp);
3807 	return (error);
3808 }
3809 
3810 /*
3811  * Set the time of first use for a security association.  Update any
3812  * expiration times as a result.
3813  */
3814 void
3815 sadb_set_usetime(ipsa_t *assoc)
3816 {
3817 	time_t snapshot = gethrestime_sec();
3818 
3819 	mutex_enter(&assoc->ipsa_lock);
3820 	assoc->ipsa_lastuse = snapshot;
3821 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3822 
3823 	/*
3824 	 * Caller does check usetime before calling me usually, and
3825 	 * double-checking is better than a mutex_enter/exit hit.
3826 	 */
3827 	if (assoc->ipsa_usetime == 0) {
3828 		/*
3829 		 * This is redundant for outbound SA's, as
3830 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3831 		 * Inbound SAs, however, have no such protection.
3832 		 */
3833 		assoc->ipsa_flags |= IPSA_F_USED;
3834 		assoc->ipsa_usetime = snapshot;
3835 
3836 		/*
3837 		 * After setting the use time, see if we have a use lifetime
3838 		 * that would cause the actual SA expiration time to shorten.
3839 		 */
3840 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3841 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3842 	}
3843 	mutex_exit(&assoc->ipsa_lock);
3844 }
3845 
3846 /*
3847  * Send up a PF_KEY expire message for this association.
3848  */
3849 static void
3850 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3851 {
3852 	mblk_t *mp, *mp1;
3853 	int alloclen, af;
3854 	sadb_msg_t *samsg;
3855 	sadb_lifetime_t *current, *expire;
3856 	sadb_sa_t *saext;
3857 	uint8_t *end;
3858 	boolean_t tunnel_mode;
3859 
3860 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3861 
3862 	/* Don't bother sending if there's no queue. */
3863 	if (pfkey_q == NULL)
3864 		return;
3865 
3866 	/* If the SA is one of a pair, only SOFT expire the OUTBOUND SA */
3867 	if (assoc->ipsa_state == IPSA_STATE_DYING &&
3868 	    (assoc->ipsa_flags & IPSA_F_PAIRED) &&
3869 	    !(assoc->ipsa_flags & IPSA_F_OUTBOUND)) {
3870 		return;
3871 	}
3872 
3873 	mp = sadb_keysock_out(0);
3874 	if (mp == NULL) {
3875 		/* cmn_err(CE_WARN, */
3876 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3877 		return;
3878 	}
3879 
3880 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3881 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3882 
3883 	af = assoc->ipsa_addrfam;
3884 	switch (af) {
3885 	case AF_INET:
3886 		alloclen += 2 * sizeof (struct sockaddr_in);
3887 		break;
3888 	case AF_INET6:
3889 		alloclen += 2 * sizeof (struct sockaddr_in6);
3890 		break;
3891 	default:
3892 		/* Won't happen unless there's a kernel bug. */
3893 		freeb(mp);
3894 		cmn_err(CE_WARN,
3895 		    "sadb_expire_assoc: Unknown address length.\n");
3896 		return;
3897 	}
3898 
3899 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3900 	if (tunnel_mode) {
3901 		alloclen += 2 * sizeof (sadb_address_t);
3902 		switch (assoc->ipsa_innerfam) {
3903 		case AF_INET:
3904 			alloclen += 2 * sizeof (struct sockaddr_in);
3905 			break;
3906 		case AF_INET6:
3907 			alloclen += 2 * sizeof (struct sockaddr_in6);
3908 			break;
3909 		default:
3910 			/* Won't happen unless there's a kernel bug. */
3911 			freeb(mp);
3912 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3913 			    "Unknown inner address length.\n");
3914 			return;
3915 		}
3916 	}
3917 
3918 	mp->b_cont = allocb(alloclen, BPRI_HI);
3919 	if (mp->b_cont == NULL) {
3920 		freeb(mp);
3921 		/* cmn_err(CE_WARN, */
3922 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3923 		return;
3924 	}
3925 
3926 	mp1 = mp;
3927 	mp = mp->b_cont;
3928 	end = mp->b_wptr + alloclen;
3929 
3930 	samsg = (sadb_msg_t *)mp->b_wptr;
3931 	mp->b_wptr += sizeof (*samsg);
3932 	samsg->sadb_msg_version = PF_KEY_V2;
3933 	samsg->sadb_msg_type = SADB_EXPIRE;
3934 	samsg->sadb_msg_errno = 0;
3935 	samsg->sadb_msg_satype = assoc->ipsa_type;
3936 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3937 	samsg->sadb_msg_reserved = 0;
3938 	samsg->sadb_msg_seq = 0;
3939 	samsg->sadb_msg_pid = 0;
3940 
3941 	saext = (sadb_sa_t *)mp->b_wptr;
3942 	mp->b_wptr += sizeof (*saext);
3943 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3944 	saext->sadb_sa_exttype = SADB_EXT_SA;
3945 	saext->sadb_sa_spi = assoc->ipsa_spi;
3946 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3947 	saext->sadb_sa_state = assoc->ipsa_state;
3948 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3949 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3950 	saext->sadb_sa_flags = assoc->ipsa_flags;
3951 
3952 	current = (sadb_lifetime_t *)mp->b_wptr;
3953 	mp->b_wptr += sizeof (sadb_lifetime_t);
3954 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3955 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3956 	/* We do not support the concept. */
3957 	current->sadb_lifetime_allocations = 0;
3958 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3959 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3960 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3961 
3962 	expire = (sadb_lifetime_t *)mp->b_wptr;
3963 	mp->b_wptr += sizeof (*expire);
3964 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3965 
3966 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3967 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3968 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3969 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3970 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3971 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3972 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3973 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3974 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3975 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3976 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3977 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3978 	} else {
3979 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3980 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3981 		expire->sadb_lifetime_allocations = 0;
3982 		expire->sadb_lifetime_bytes = 0;
3983 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3984 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3985 	}
3986 
3987 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3988 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3989 	    SA_PROTO(assoc), 0);
3990 	ASSERT(mp->b_wptr != NULL);
3991 
3992 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3993 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3994 	    SA_PROTO(assoc), 0);
3995 	ASSERT(mp->b_wptr != NULL);
3996 
3997 	if (tunnel_mode) {
3998 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3999 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
4000 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
4001 		    assoc->ipsa_innersrcpfx);
4002 		ASSERT(mp->b_wptr != NULL);
4003 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
4004 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
4005 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
4006 		    assoc->ipsa_innerdstpfx);
4007 		ASSERT(mp->b_wptr != NULL);
4008 	}
4009 
4010 	/* Can just putnext, we're ready to go! */
4011 	putnext(pfkey_q, mp1);
4012 }
4013 
4014 /*
4015  * "Age" the SA with the number of bytes that was used to protect traffic.
4016  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
4017  * enough "charge" left in the SA to protect the data.	Return B_FALSE
4018  * otherwise.  (If B_FALSE is returned, the association either was, or became
4019  * DEAD.)
4020  */
4021 boolean_t
4022 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
4023     boolean_t sendmsg)
4024 {
4025 	boolean_t rc = B_TRUE;
4026 	uint64_t newtotal;
4027 
4028 	mutex_enter(&assoc->ipsa_lock);
4029 	newtotal = assoc->ipsa_bytes + bytes;
4030 	if (assoc->ipsa_hardbyteslt != 0 &&
4031 	    newtotal >= assoc->ipsa_hardbyteslt) {
4032 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
4033 			sadb_delete_cluster(assoc);
4034 			/*
4035 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4036 			 * this off on another non-interrupt thread.  Also
4037 			 * unlink this SA immediately.
4038 			 */
4039 			assoc->ipsa_state = IPSA_STATE_DEAD;
4040 			if (sendmsg)
4041 				sadb_expire_assoc(pfkey_q, assoc);
4042 			/*
4043 			 * Set non-zero expiration time so sadb_age_assoc()
4044 			 * will work when reaping.
4045 			 */
4046 			assoc->ipsa_hardexpiretime = (time_t)1;
4047 		} /* Else someone beat me to it! */
4048 		rc = B_FALSE;
4049 	} else if (assoc->ipsa_softbyteslt != 0 &&
4050 	    (newtotal >= assoc->ipsa_softbyteslt)) {
4051 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
4052 			/*
4053 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
4054 			 * this off on another non-interrupt thread.
4055 			 */
4056 			assoc->ipsa_state = IPSA_STATE_DYING;
4057 			assoc->ipsa_bytes = newtotal;
4058 			if (sendmsg)
4059 				sadb_expire_assoc(pfkey_q, assoc);
4060 		} /* Else someone beat me to it! */
4061 	}
4062 	if (rc == B_TRUE)
4063 		assoc->ipsa_bytes = newtotal;
4064 	mutex_exit(&assoc->ipsa_lock);
4065 	return (rc);
4066 }
4067 
4068 /*
4069  * Push one or more DL_CO_DELETE messages queued up by
4070  * sadb_torch_assoc down to the underlying driver now that it's a
4071  * convenient time for it (i.e., ipsa bucket locks not held).
4072  */
4073 static void
4074 sadb_drain_torchq(queue_t *q, mblk_t *mp)
4075 {
4076 	while (mp != NULL) {
4077 		mblk_t *next = mp->b_next;
4078 		mp->b_next = NULL;
4079 		if (q != NULL)
4080 			putnext(q, mp);
4081 		else
4082 			freemsg(mp);
4083 		mp = next;
4084 	}
4085 }
4086 
4087 /*
4088  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
4089  *     sadb_age_assoc().
4090  *
4091  * If SA is hardware-accelerated, and we can't allocate the mblk
4092  * containing the DL_CO_DELETE, just return; it will remain in the
4093  * table and be swept up by sadb_ager() in a subsequent pass.
4094  */
4095 static ipsa_t *
4096 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
4097 {
4098 	mblk_t *mp;
4099 
4100 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4101 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
4102 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
4103 
4104 	/*
4105 	 * Force cached SAs to be revalidated..
4106 	 */
4107 	head->isaf_gen++;
4108 
4109 	if (sa->ipsa_flags & IPSA_F_HW) {
4110 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
4111 		if (mp == NULL) {
4112 			mutex_exit(&sa->ipsa_lock);
4113 			return (NULL);
4114 		}
4115 		mp->b_next = *mq;
4116 		*mq = mp;
4117 	}
4118 	mutex_exit(&sa->ipsa_lock);
4119 	sadb_unlinkassoc(sa);
4120 
4121 	return (NULL);
4122 }
4123 
4124 /*
4125  * Do various SA-is-idle activities depending on delta (the number of idle
4126  * seconds on the SA) and/or other properties of the SA.
4127  *
4128  * Return B_TRUE if I've sent a packet, because I have to drop the
4129  * association's mutex before sending a packet out the wire.
4130  */
4131 /* ARGSUSED */
4132 static boolean_t
4133 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4134 {
4135 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4136 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4137 
4138 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4139 
4140 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4141 	    delta >= nat_t_interval &&
4142 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4143 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4144 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4145 		mutex_exit(&assoc->ipsa_lock);
4146 		ipsecesp_send_keepalive(assoc);
4147 		return (B_TRUE);
4148 	}
4149 	return (B_FALSE);
4150 }
4151 
4152 /*
4153  * Return "assoc" if haspeer is true and I send an expire.  This allows
4154  * the consumers' aging functions to tidy up an expired SA's peer.
4155  */
4156 static ipsa_t *
4157 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4158     time_t current, int reap_delay, boolean_t inbound, mblk_t **mq)
4159 {
4160 	ipsa_t *retval = NULL;
4161 	boolean_t dropped_mutex = B_FALSE;
4162 
4163 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4164 
4165 	mutex_enter(&assoc->ipsa_lock);
4166 
4167 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4168 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4169 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4170 	    (assoc->ipsa_hardexpiretime != 0))) &&
4171 	    (assoc->ipsa_hardexpiretime <= current)) {
4172 		assoc->ipsa_state = IPSA_STATE_DEAD;
4173 		return (sadb_torch_assoc(head, assoc, inbound, mq));
4174 	}
4175 
4176 	/*
4177 	 * Check lifetimes.  Fortunately, SA setup is done
4178 	 * such that there are only two times to look at,
4179 	 * softexpiretime, and hardexpiretime.
4180 	 *
4181 	 * Check hard first.
4182 	 */
4183 
4184 	if (assoc->ipsa_hardexpiretime != 0 &&
4185 	    assoc->ipsa_hardexpiretime <= current) {
4186 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4187 			return (sadb_torch_assoc(head, assoc, inbound, mq));
4188 
4189 		if (inbound) {
4190 			sadb_delete_cluster(assoc);
4191 		}
4192 
4193 		/*
4194 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4195 		 */
4196 		assoc->ipsa_state = IPSA_STATE_DEAD;
4197 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4198 			/*
4199 			 * If the SA is paired or peered with another, put
4200 			 * a copy on a list which can be processed later, the
4201 			 * pair/peer SA needs to be updated so the both die
4202 			 * at the same time.
4203 			 *
4204 			 * If I return assoc, I have to bump up its reference
4205 			 * count to keep with the ipsa_t reference count
4206 			 * semantics.
4207 			 */
4208 			IPSA_REFHOLD(assoc);
4209 			retval = assoc;
4210 		}
4211 		sadb_expire_assoc(pfkey_q, assoc);
4212 		assoc->ipsa_hardexpiretime = current + reap_delay;
4213 	} else if (assoc->ipsa_softexpiretime != 0 &&
4214 	    assoc->ipsa_softexpiretime <= current &&
4215 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4216 		/*
4217 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4218 		 * this off on another non-interrupt thread.
4219 		 */
4220 		assoc->ipsa_state = IPSA_STATE_DYING;
4221 		if (assoc->ipsa_haspeer) {
4222 			/*
4223 			 * If the SA has a peer, update the peer's state
4224 			 * on SOFT_EXPIRE, this is mostly to prevent two
4225 			 * expire messages from effectively the same SA.
4226 			 *
4227 			 * Don't care about paired SA's, then can (and should)
4228 			 * be able to soft expire at different times.
4229 			 *
4230 			 * If I return assoc, I have to bump up its
4231 			 * reference count to keep with the ipsa_t reference
4232 			 * count semantics.
4233 			 */
4234 			IPSA_REFHOLD(assoc);
4235 			retval = assoc;
4236 		}
4237 		sadb_expire_assoc(pfkey_q, assoc);
4238 	} else if (assoc->ipsa_idletime != 0 &&
4239 	    assoc->ipsa_idleexpiretime <= current) {
4240 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4241 			assoc->ipsa_state = IPSA_STATE_IDLE;
4242 		}
4243 
4244 		/*
4245 		 * Need to handle Mature case
4246 		 */
4247 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4248 			sadb_expire_assoc(pfkey_q, assoc);
4249 		}
4250 	} else {
4251 		/* Check idle time activities. */
4252 		dropped_mutex = sadb_idle_activities(assoc,
4253 		    current - assoc->ipsa_lastuse, inbound);
4254 	}
4255 
4256 	if (!dropped_mutex)
4257 		mutex_exit(&assoc->ipsa_lock);
4258 	return (retval);
4259 }
4260 
4261 /*
4262  * Called by a consumer protocol to do ther dirty work of reaping dead
4263  * Security Associations.
4264  *
4265  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4266  * SA's that are already marked DEAD, so expired SA's are only reaped
4267  * the second time sadb_ager() runs.
4268  */
4269 void
4270 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay,
4271     netstack_t *ns)
4272 {
4273 	int i;
4274 	isaf_t *bucket;
4275 	ipsa_t *assoc, *spare;
4276 	iacqf_t *acqlist;
4277 	ipsacq_t *acqrec, *spareacq;
4278 	templist_t *haspeerlist, *newbie;
4279 	/* Snapshot current time now. */
4280 	time_t current = gethrestime_sec();
4281 	mblk_t *mq = NULL;
4282 	haspeerlist = NULL;
4283 
4284 	/*
4285 	 * Do my dirty work.  This includes aging real entries, aging
4286 	 * larvals, and aging outstanding ACQUIREs.
4287 	 *
4288 	 * I hope I don't tie up resources for too long.
4289 	 */
4290 
4291 	/* Age acquires. */
4292 
4293 	for (i = 0; i < sp->sdb_hashsize; i++) {
4294 		acqlist = &sp->sdb_acq[i];
4295 		mutex_enter(&acqlist->iacqf_lock);
4296 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4297 		    acqrec = spareacq) {
4298 			spareacq = acqrec->ipsacq_next;
4299 			if (current > acqrec->ipsacq_expire)
4300 				sadb_destroy_acquire(acqrec, ns);
4301 		}
4302 		mutex_exit(&acqlist->iacqf_lock);
4303 	}
4304 
4305 	/* Age inbound associations. */
4306 	for (i = 0; i < sp->sdb_hashsize; i++) {
4307 		bucket = &(sp->sdb_if[i]);
4308 		mutex_enter(&bucket->isaf_lock);
4309 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4310 		    assoc = spare) {
4311 			spare = assoc->ipsa_next;
4312 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4313 			    reap_delay, B_TRUE, &mq) != NULL) {
4314 				/*
4315 				 * Put SA's which have a peer or SA's which
4316 				 * are paired on a list for processing after
4317 				 * all the hash tables have been walked.
4318 				 *
4319 				 * sadb_age_assoc() increments the refcnt,
4320 				 * effectively doing an IPSA_REFHOLD().
4321 				 */
4322 				newbie = kmem_alloc(sizeof (*newbie),
4323 				    KM_NOSLEEP);
4324 				if (newbie == NULL) {
4325 					/*
4326 					 * Don't forget to REFRELE().
4327 					 */
4328 					IPSA_REFRELE(assoc);
4329 					continue;	/* for loop... */
4330 				}
4331 				newbie->next = haspeerlist;
4332 				newbie->ipsa = assoc;
4333 				haspeerlist = newbie;
4334 			}
4335 		}
4336 		mutex_exit(&bucket->isaf_lock);
4337 	}
4338 
4339 	if (mq != NULL) {
4340 		sadb_drain_torchq(ip_q, mq);
4341 		mq = NULL;
4342 	}
4343 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4344 	haspeerlist = NULL;
4345 
4346 	/* Age outbound associations. */
4347 	for (i = 0; i < sp->sdb_hashsize; i++) {
4348 		bucket = &(sp->sdb_of[i]);
4349 		mutex_enter(&bucket->isaf_lock);
4350 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4351 		    assoc = spare) {
4352 			spare = assoc->ipsa_next;
4353 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4354 			    reap_delay, B_FALSE, &mq) != NULL) {
4355 				/*
4356 				 * sadb_age_assoc() increments the refcnt,
4357 				 * effectively doing an IPSA_REFHOLD().
4358 				 */
4359 				newbie = kmem_alloc(sizeof (*newbie),
4360 				    KM_NOSLEEP);
4361 				if (newbie == NULL) {
4362 					/*
4363 					 * Don't forget to REFRELE().
4364 					 */
4365 					IPSA_REFRELE(assoc);
4366 					continue;	/* for loop... */
4367 				}
4368 				newbie->next = haspeerlist;
4369 				newbie->ipsa = assoc;
4370 				haspeerlist = newbie;
4371 			}
4372 		}
4373 		mutex_exit(&bucket->isaf_lock);
4374 	}
4375 	if (mq != NULL) {
4376 		sadb_drain_torchq(ip_q, mq);
4377 		mq = NULL;
4378 	}
4379 
4380 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4381 
4382 	/*
4383 	 * Run a GC pass to clean out dead identities.
4384 	 */
4385 	ipsid_gc(ns);
4386 }
4387 
4388 /*
4389  * Figure out when to reschedule the ager.
4390  */
4391 timeout_id_t
4392 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4393     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4394 {
4395 	hrtime_t end = gethrtime();
4396 	uint_t interval = *intp;
4397 
4398 	/*
4399 	 * See how long this took.  If it took too long, increase the
4400 	 * aging interval.
4401 	 */
4402 	if ((end - begin) > (hrtime_t)interval * (hrtime_t)1000000) {
4403 		if (interval >= intmax) {
4404 			/* XXX Rate limit this?  Or recommend flush? */
4405 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4406 			    "Too many SA's to age out in %d msec.\n",
4407 			    intmax);
4408 		} else {
4409 			/* Double by shifting by one bit. */
4410 			interval <<= 1;
4411 			interval = min(interval, intmax);
4412 		}
4413 	} else if ((end - begin) <= (hrtime_t)interval * (hrtime_t)500000 &&
4414 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4415 		/*
4416 		 * If I took less than half of the interval, then I should
4417 		 * ratchet the interval back down.  Never automatically
4418 		 * shift below the default aging interval.
4419 		 *
4420 		 * NOTE:This even overrides manual setting of the age
4421 		 *	interval using NDD to lower the setting past the
4422 		 *	default.  In other words, if you set the interval
4423 		 *	lower than the default, and your SADB gets too big,
4424 		 *	the interval will only self-lower back to the default.
4425 		 */
4426 		/* Halve by shifting one bit. */
4427 		interval >>= 1;
4428 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4429 	}
4430 	*intp = interval;
4431 	return (qtimeout(pfkey_q, ager, agerarg,
4432 	    drv_usectohz(interval * 1000)));
4433 }
4434 
4435 
4436 /*
4437  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4438  * message takes when updating a MATURE or DYING SA.
4439  */
4440 static void
4441 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4442     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4443 {
4444 	mutex_enter(&assoc->ipsa_lock);
4445 
4446 	/*
4447 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4448 	 * passed in during an update message.	We currently don't handle
4449 	 * these.
4450 	 */
4451 
4452 	if (hard != NULL) {
4453 		if (hard->sadb_lifetime_bytes != 0)
4454 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4455 		if (hard->sadb_lifetime_usetime != 0)
4456 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4457 		if (hard->sadb_lifetime_addtime != 0)
4458 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4459 		if (assoc->ipsa_hardaddlt != 0) {
4460 			assoc->ipsa_hardexpiretime =
4461 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4462 		}
4463 		if (assoc->ipsa_harduselt != 0 &&
4464 		    assoc->ipsa_flags & IPSA_F_USED) {
4465 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4466 		}
4467 		if (hard->sadb_lifetime_allocations != 0)
4468 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4469 	}
4470 
4471 	if (soft != NULL) {
4472 		if (soft->sadb_lifetime_bytes != 0) {
4473 			if (soft->sadb_lifetime_bytes >
4474 			    assoc->ipsa_hardbyteslt) {
4475 				assoc->ipsa_softbyteslt =
4476 				    assoc->ipsa_hardbyteslt;
4477 			} else {
4478 				assoc->ipsa_softbyteslt =
4479 				    soft->sadb_lifetime_bytes;
4480 			}
4481 		}
4482 		if (soft->sadb_lifetime_usetime != 0) {
4483 			if (soft->sadb_lifetime_usetime >
4484 			    assoc->ipsa_harduselt) {
4485 				assoc->ipsa_softuselt =
4486 				    assoc->ipsa_harduselt;
4487 			} else {
4488 				assoc->ipsa_softuselt =
4489 				    soft->sadb_lifetime_usetime;
4490 			}
4491 		}
4492 		if (soft->sadb_lifetime_addtime != 0) {
4493 			if (soft->sadb_lifetime_addtime >
4494 			    assoc->ipsa_hardexpiretime) {
4495 				assoc->ipsa_softexpiretime =
4496 				    assoc->ipsa_hardexpiretime;
4497 			} else {
4498 				assoc->ipsa_softaddlt =
4499 				    soft->sadb_lifetime_addtime;
4500 			}
4501 		}
4502 		if (assoc->ipsa_softaddlt != 0) {
4503 			assoc->ipsa_softexpiretime =
4504 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4505 		}
4506 		if (assoc->ipsa_softuselt != 0 &&
4507 		    assoc->ipsa_flags & IPSA_F_USED) {
4508 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4509 		}
4510 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4511 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4512 				lifetime_fuzz(assoc);
4513 		}
4514 
4515 		if (soft->sadb_lifetime_allocations != 0)
4516 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4517 	}
4518 
4519 	if (idle != NULL) {
4520 		time_t current = gethrestime_sec();
4521 		if ((assoc->ipsa_idleexpiretime <= current) &&
4522 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4523 			assoc->ipsa_idleexpiretime =
4524 			    current + assoc->ipsa_idleaddlt;
4525 		}
4526 		if (idle->sadb_lifetime_addtime != 0)
4527 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4528 		if (idle->sadb_lifetime_usetime != 0)
4529 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4530 		if (assoc->ipsa_idleaddlt != 0) {
4531 			assoc->ipsa_idleexpiretime =
4532 			    current + idle->sadb_lifetime_addtime;
4533 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4534 		}
4535 		if (assoc->ipsa_idleuselt != 0) {
4536 			if (assoc->ipsa_idletime != 0) {
4537 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4538 				    assoc->ipsa_idleuselt);
4539 			assoc->ipsa_idleexpiretime =
4540 			    current + assoc->ipsa_idletime;
4541 			} else {
4542 				assoc->ipsa_idleexpiretime =
4543 				    current + assoc->ipsa_idleuselt;
4544 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4545 			}
4546 		}
4547 	}
4548 	mutex_exit(&assoc->ipsa_lock);
4549 }
4550 
4551 static int
4552 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4553 {
4554 	int rcode = 0;
4555 	time_t current = gethrestime_sec();
4556 
4557 	mutex_enter(&assoc->ipsa_lock);
4558 
4559 	switch (new_state) {
4560 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4561 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4562 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4563 			assoc->ipsa_idleexpiretime =
4564 			    current + assoc->ipsa_idletime;
4565 		}
4566 		break;
4567 	case SADB_X_SASTATE_IDLE:
4568 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4569 			assoc->ipsa_state = IPSA_STATE_IDLE;
4570 			assoc->ipsa_idleexpiretime =
4571 			    current + assoc->ipsa_idletime;
4572 		} else {
4573 			rcode = EINVAL;
4574 		}
4575 		break;
4576 
4577 	case SADB_X_SASTATE_ACTIVE:
4578 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4579 			rcode = EINVAL;
4580 			break;
4581 		}
4582 		assoc->ipsa_state = IPSA_STATE_MATURE;
4583 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4584 
4585 		if (ipkt_lst == NULL) {
4586 			break;
4587 		}
4588 
4589 		if (assoc->ipsa_bpkt_head != NULL) {
4590 			*ipkt_lst = assoc->ipsa_bpkt_head;
4591 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4592 			assoc->ipsa_mblkcnt = 0;
4593 		} else {
4594 			*ipkt_lst = NULL;
4595 		}
4596 		break;
4597 	default:
4598 		rcode = EINVAL;
4599 		break;
4600 	}
4601 
4602 	mutex_exit(&assoc->ipsa_lock);
4603 	return (rcode);
4604 }
4605 
4606 /*
4607  * Common code to update an SA.
4608  */
4609 
4610 int
4611 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4612     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4613     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4614     netstack_t *ns, uint8_t sadb_msg_type)
4615 {
4616 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4617 	sadb_address_t *srcext =
4618 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4619 	sadb_address_t *dstext =
4620 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4621 	sadb_x_kmc_t *kmcext =
4622 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
4623 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4624 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4625 	sadb_x_replay_ctr_t *replext =
4626 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4627 	sadb_lifetime_t *soft =
4628 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4629 	sadb_lifetime_t *hard =
4630 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4631 	sadb_lifetime_t *idle =
4632 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4633 	sadb_x_pair_t *pair_ext =
4634 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4635 	ipsa_t *echo_target = NULL;
4636 	int error = 0;
4637 	ipsap_t *ipsapp = NULL;
4638 	uint32_t kmp = 0, kmc = 0;
4639 	time_t current = gethrestime_sec();
4640 
4641 
4642 	/* I need certain extensions present for either UPDATE message. */
4643 	if (srcext == NULL) {
4644 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
4645 		return (EINVAL);
4646 	}
4647 	if (dstext == NULL) {
4648 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
4649 		return (EINVAL);
4650 	}
4651 	if (assoc == NULL) {
4652 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
4653 		return (EINVAL);
4654 	}
4655 
4656 	if (kmcext != NULL) {
4657 		kmp = kmcext->sadb_x_kmc_proto;
4658 		kmc = kmcext->sadb_x_kmc_cookie;
4659 	}
4660 
4661 	ipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4662 	if (ipsapp == NULL) {
4663 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
4664 		return (ESRCH);
4665 	}
4666 
4667 	if (ipsapp->ipsap_psa_ptr == NULL && ipsapp->ipsap_sa_ptr != NULL) {
4668 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4669 			/*
4670 			 * REFRELE the target and let the add_sa_func()
4671 			 * deal with updating a larval SA.
4672 			 */
4673 			destroy_ipsa_pair(ipsapp);
4674 			return (add_sa_func(mp, ksi, diagnostic, ns));
4675 		}
4676 	}
4677 
4678 	/*
4679 	 * At this point we have an UPDATE to a MATURE SA. There should
4680 	 * not be any keying material present.
4681 	 */
4682 	if (akey != NULL) {
4683 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4684 		error = EINVAL;
4685 		goto bail;
4686 	}
4687 	if (ekey != NULL) {
4688 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4689 		error = EINVAL;
4690 		goto bail;
4691 	}
4692 
4693 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4694 		if (ipsapp->ipsap_sa_ptr != NULL &&
4695 		    ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4696 			if ((error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4697 			    assoc->sadb_sa_state, NULL)) != 0) {
4698 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4699 				goto bail;
4700 			}
4701 		}
4702 		if (ipsapp->ipsap_psa_ptr != NULL &&
4703 		    ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4704 			if ((error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4705 			    assoc->sadb_sa_state, NULL)) != 0) {
4706 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4707 				goto bail;
4708 			}
4709 		}
4710 	}
4711 	if (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4712 		if (ipsapp->ipsap_sa_ptr != NULL) {
4713 			error = sadb_update_state(ipsapp->ipsap_sa_ptr,
4714 			    assoc->sadb_sa_state,
4715 			    (ipsapp->ipsap_sa_ptr->ipsa_flags &
4716 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4717 			if (error) {
4718 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4719 				goto bail;
4720 			}
4721 		}
4722 		if (ipsapp->ipsap_psa_ptr != NULL) {
4723 			error = sadb_update_state(ipsapp->ipsap_psa_ptr,
4724 			    assoc->sadb_sa_state,
4725 			    (ipsapp->ipsap_psa_ptr->ipsa_flags &
4726 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4727 			if (error) {
4728 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4729 				goto bail;
4730 			}
4731 		}
4732 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4733 		    ksi, echo_target);
4734 		goto bail;
4735 	}
4736 
4737 	/*
4738 	 * Reality checks for updates of active associations.
4739 	 * Sundry first-pass UPDATE-specific reality checks.
4740 	 * Have to do the checks here, because it's after the add_sa code.
4741 	 * XXX STATS : logging/stats here?
4742 	 */
4743 
4744 	if (!((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4745 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4746 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4747 		error = EINVAL;
4748 		goto bail;
4749 	}
4750 
4751 	if (assoc->sadb_sa_flags & ~spp->s_updateflags) {
4752 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4753 		error = EINVAL;
4754 		goto bail;
4755 	}
4756 
4757 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4758 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4759 		error = EOPNOTSUPP;
4760 		goto bail;
4761 	}
4762 
4763 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4764 		error = EINVAL;
4765 		goto bail;
4766 	}
4767 
4768 	if (ipsapp->ipsap_sa_ptr != NULL) {
4769 		if (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4770 			error = ESRCH;	/* DEAD == Not there, in this case. */
4771 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4772 			goto bail;
4773 		}
4774 		if ((kmp != 0) &&
4775 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmp != 0) ||
4776 		    (ipsapp->ipsap_sa_ptr->ipsa_kmp != kmp))) {
4777 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4778 			error = EINVAL;
4779 			goto bail;
4780 		}
4781 		if ((kmc != 0) &&
4782 		    ((ipsapp->ipsap_sa_ptr->ipsa_kmc != 0) ||
4783 		    (ipsapp->ipsap_sa_ptr->ipsa_kmc != kmc))) {
4784 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4785 			error = EINVAL;
4786 			goto bail;
4787 		}
4788 		/*
4789 		 * Do not allow replay value change for MATURE or LARVAL SA.
4790 		 */
4791 
4792 		if ((replext != NULL) &&
4793 		    ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4794 		    (ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4795 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4796 			error = EINVAL;
4797 			goto bail;
4798 		}
4799 	}
4800 
4801 	if (ipsapp->ipsap_psa_ptr != NULL) {
4802 		if (ipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) {
4803 			*diagnostic = SADB_X_DIAGNOSTIC_SA_EXPIRED;
4804 			error = ESRCH;	/* DEAD == Not there, in this case. */
4805 			goto bail;
4806 		}
4807 		if ((kmp != 0) &&
4808 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmp != 0) ||
4809 		    (ipsapp->ipsap_psa_ptr->ipsa_kmp != kmp))) {
4810 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4811 			error = EINVAL;
4812 			goto bail;
4813 		}
4814 		if ((kmc != 0) &&
4815 		    ((ipsapp->ipsap_psa_ptr->ipsa_kmc != 0) ||
4816 		    (ipsapp->ipsap_psa_ptr->ipsa_kmc != kmc))) {
4817 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4818 			error = EINVAL;
4819 			goto bail;
4820 		}
4821 	}
4822 
4823 	if (ipsapp->ipsap_sa_ptr != NULL) {
4824 		sadb_update_lifetimes(ipsapp->ipsap_sa_ptr, hard, soft,
4825 		    idle, B_TRUE);
4826 		if (kmp != 0)
4827 			ipsapp->ipsap_sa_ptr->ipsa_kmp = kmp;
4828 		if (kmc != 0)
4829 			ipsapp->ipsap_sa_ptr->ipsa_kmc = kmc;
4830 		if ((replext != NULL) &&
4831 		    (ipsapp->ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4832 			/*
4833 			 * If an inbound SA, update the replay counter
4834 			 * and check off all the other sequence number
4835 			 */
4836 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4837 				if (!sadb_replay_check(ipsapp->ipsap_sa_ptr,
4838 				    replext->sadb_x_rc_replay32)) {
4839 					*diagnostic =
4840 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4841 					error = EINVAL;
4842 					goto bail;
4843 				}
4844 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4845 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4846 				    current +
4847 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4848 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4849 			} else {
4850 				mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4851 				ipsapp->ipsap_sa_ptr->ipsa_replay =
4852 				    replext->sadb_x_rc_replay32;
4853 				ipsapp->ipsap_sa_ptr->ipsa_idleexpiretime =
4854 				    current +
4855 				    ipsapp->ipsap_sa_ptr->ipsa_idletime;
4856 				mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4857 			}
4858 		}
4859 	}
4860 
4861 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4862 		if (ipsapp->ipsap_psa_ptr != NULL) {
4863 			sadb_update_lifetimes(ipsapp->ipsap_psa_ptr, hard, soft,
4864 			    idle, B_FALSE);
4865 			if (kmp != 0)
4866 				ipsapp->ipsap_psa_ptr->ipsa_kmp = kmp;
4867 			if (kmc != 0)
4868 				ipsapp->ipsap_psa_ptr->ipsa_kmc = kmc;
4869 		} else {
4870 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4871 			error = ESRCH;
4872 			goto bail;
4873 		}
4874 	}
4875 
4876 	if (pair_ext != NULL)
4877 		error = update_pairing(ipsapp, ksi, diagnostic, spp);
4878 
4879 	if (error == 0)
4880 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4881 		    ksi, echo_target);
4882 bail:
4883 
4884 	destroy_ipsa_pair(ipsapp);
4885 
4886 	return (error);
4887 }
4888 
4889 
4890 int
4891 update_pairing(ipsap_t *ipsapp, keysock_in_t *ksi, int *diagnostic,
4892     sadbp_t *spp)
4893 {
4894 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4895 	sadb_address_t *srcext =
4896 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
4897 	sadb_address_t *dstext =
4898 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
4899 	sadb_x_pair_t *pair_ext =
4900 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4901 	int error = 0;
4902 	ipsap_t *oipsapp = NULL;
4903 	boolean_t undo_pair = B_FALSE;
4904 	uint32_t ipsa_flags;
4905 
4906 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4907 	    assoc->sadb_sa_spi) {
4908 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4909 		return (EINVAL);
4910 	}
4911 
4912 	/*
4913 	 * Assume for now that the spi value provided in the SADB_UPDATE
4914 	 * message was valid, update the SA with its pair spi value.
4915 	 * If the spi turns out to be bogus or the SA no longer exists
4916 	 * then this will be detected when the reverse update is made
4917 	 * below.
4918 	 */
4919 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4920 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4921 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4922 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4923 
4924 	/*
4925 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4926 	 * should now return pointers to the SA *AND* its pair, if this is not
4927 	 * the case, the "otherspi" either did not exist or was deleted. Also
4928 	 * check that "otherspi" is not already paired. If everything looks
4929 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4930 	 * after this update to ensure its not deleted until we are done.
4931 	 */
4932 	oipsapp = get_ipsa_pair(assoc, srcext, dstext, spp);
4933 	if (oipsapp == NULL) {
4934 		/*
4935 		 * This should never happen, calling function still has
4936 		 * IPSA_REFHELD on the SA we just updated.
4937 		 */
4938 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4939 		return (EINVAL);
4940 	}
4941 
4942 	if (oipsapp->ipsap_psa_ptr == NULL) {
4943 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4944 		error = EINVAL;
4945 		undo_pair = B_TRUE;
4946 	} else {
4947 		ipsa_flags = oipsapp->ipsap_psa_ptr->ipsa_flags;
4948 		if ((oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4949 		    (oipsapp->ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4950 			/* Its dead Jim! */
4951 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4952 			undo_pair = B_TRUE;
4953 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4954 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4955 			/* This SA is in both hashtables. */
4956 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4957 			undo_pair = B_TRUE;
4958 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4959 			/* This SA is already paired with another. */
4960 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4961 			undo_pair = B_TRUE;
4962 		}
4963 	}
4964 
4965 	if (undo_pair) {
4966 		/* The pair SA does not exist. */
4967 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4968 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4969 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4970 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4971 	} else {
4972 		mutex_enter(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4973 		oipsapp->ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4974 		oipsapp->ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4975 		mutex_exit(&oipsapp->ipsap_psa_ptr->ipsa_lock);
4976 	}
4977 
4978 	destroy_ipsa_pair(oipsapp);
4979 	return (error);
4980 }
4981 
4982 /*
4983  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4984  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4985  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4986  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4987  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4988  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4989  * other direction's SA.
4990  */
4991 
4992 /*
4993  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4994  * grab it, lock it, and return it.  Otherwise return NULL.
4995  */
4996 static ipsacq_t *
4997 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4998     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4999     uint64_t unique_id)
5000 {
5001 	ipsacq_t *walker;
5002 	sa_family_t fam;
5003 	uint32_t blank_address[4] = {0, 0, 0, 0};
5004 
5005 	if (isrc == NULL) {
5006 		ASSERT(idst == NULL);
5007 		isrc = idst = blank_address;
5008 	}
5009 
5010 	/*
5011 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
5012 	 *
5013 	 * XXX May need search for duplicates based on other things too!
5014 	 */
5015 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
5016 	    walker = walker->ipsacq_next) {
5017 		mutex_enter(&walker->ipsacq_lock);
5018 		fam = walker->ipsacq_addrfam;
5019 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
5020 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
5021 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
5022 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
5023 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
5024 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
5025 		    (ap == walker->ipsacq_act) &&
5026 		    (pp == walker->ipsacq_policy) &&
5027 		    /* XXX do deep compares of ap/pp? */
5028 		    (unique_id == walker->ipsacq_unique_id))
5029 			break;			/* everything matched */
5030 		mutex_exit(&walker->ipsacq_lock);
5031 	}
5032 
5033 	return (walker);
5034 }
5035 
5036 /*
5037  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5038  * of all of the same length.  Give up (and drop) if memory
5039  * cannot be allocated for a new one; otherwise, invoke callback to
5040  * send the acquire up..
5041  *
5042  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5043  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
5044  * and handle this case specially.
5045  */
5046 void
5047 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
5048 {
5049 	sadbp_t *spp;
5050 	sadb_t *sp;
5051 	ipsacq_t *newbie;
5052 	iacqf_t *bucket;
5053 	mblk_t *datamp = mp->b_cont;
5054 	mblk_t *extended;
5055 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5056 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5057 	uint32_t *src, *dst, *isrc, *idst;
5058 	ipsec_policy_t *pp = io->ipsec_out_policy;
5059 	ipsec_action_t *ap = io->ipsec_out_act;
5060 	sa_family_t af;
5061 	int hashoffset;
5062 	uint32_t seq;
5063 	uint64_t unique_id = 0;
5064 	ipsec_selector_t sel;
5065 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
5066 	netstack_t	*ns = io->ipsec_out_ns;
5067 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5068 
5069 	ASSERT((pp != NULL) || (ap != NULL));
5070 
5071 	ASSERT(need_ah != NULL || need_esp != NULL);
5072 	/* Assign sadb pointers */
5073 	if (need_esp) { /* ESP for AH+ESP */
5074 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5075 
5076 		spp = &espstack->esp_sadb;
5077 	} else {
5078 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5079 
5080 		spp = &ahstack->ah_sadb;
5081 	}
5082 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
5083 
5084 	if (ap == NULL)
5085 		ap = pp->ipsp_act;
5086 
5087 	ASSERT(ap != NULL);
5088 
5089 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5090 		unique_id = SA_FORM_UNIQUE_ID(io);
5091 
5092 	/*
5093 	 * Set up an ACQUIRE record.
5094 	 *
5095 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5096 	 * below the lowest point allowed in the kernel.  (In other words,
5097 	 * make sure the high bit on the sequence number is set.)
5098 	 */
5099 
5100 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5101 
5102 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5103 		src = (uint32_t *)&ipha->ipha_src;
5104 		dst = (uint32_t *)&ipha->ipha_dst;
5105 		af = AF_INET;
5106 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5107 		ASSERT(io->ipsec_out_v4 == B_TRUE);
5108 	} else {
5109 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5110 		src = (uint32_t *)&ip6h->ip6_src;
5111 		dst = (uint32_t *)&ip6h->ip6_dst;
5112 		af = AF_INET6;
5113 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5114 		ASSERT(io->ipsec_out_v4 == B_FALSE);
5115 	}
5116 
5117 	if (tunnel_mode) {
5118 		if (pp == NULL) {
5119 			/*
5120 			 * Tunnel mode with no policy pointer means this is a
5121 			 * reflected ICMP (like a ECHO REQUEST) that came in
5122 			 * with self-encapsulated protection.  Until we better
5123 			 * support this, drop the packet.
5124 			 */
5125 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5126 			    DROPPER(ipss, ipds_spd_got_selfencap),
5127 			    &ipss->ipsec_spd_dropper);
5128 			return;
5129 		}
5130 		/* Snag inner addresses. */
5131 		isrc = io->ipsec_out_insrc;
5132 		idst = io->ipsec_out_indst;
5133 	} else {
5134 		isrc = idst = NULL;
5135 	}
5136 
5137 	/*
5138 	 * Check buckets to see if there is an existing entry.  If so,
5139 	 * grab it.  sadb_checkacquire locks newbie if found.
5140 	 */
5141 	bucket = &(sp->sdb_acq[hashoffset]);
5142 	mutex_enter(&bucket->iacqf_lock);
5143 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5144 	    unique_id);
5145 
5146 	if (newbie == NULL) {
5147 		/*
5148 		 * Otherwise, allocate a new one.
5149 		 */
5150 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5151 		if (newbie == NULL) {
5152 			mutex_exit(&bucket->iacqf_lock);
5153 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
5154 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5155 			    &ipss->ipsec_sadb_dropper);
5156 			return;
5157 		}
5158 		newbie->ipsacq_policy = pp;
5159 		if (pp != NULL) {
5160 			IPPOL_REFHOLD(pp);
5161 		}
5162 		IPACT_REFHOLD(ap);
5163 		newbie->ipsacq_act = ap;
5164 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5165 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5166 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5167 		if (newbie->ipsacq_next != NULL)
5168 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5169 		bucket->iacqf_ipsacq = newbie;
5170 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5171 		mutex_enter(&newbie->ipsacq_lock);
5172 	}
5173 
5174 	mutex_exit(&bucket->iacqf_lock);
5175 
5176 	/*
5177 	 * This assert looks silly for now, but we may need to enter newbie's
5178 	 * mutex during a search.
5179 	 */
5180 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5181 
5182 	mp->b_next = NULL;
5183 	/* Queue up packet.  Use b_next. */
5184 	if (newbie->ipsacq_numpackets == 0) {
5185 		/* First one. */
5186 		newbie->ipsacq_mp = mp;
5187 		newbie->ipsacq_numpackets = 1;
5188 		newbie->ipsacq_expire = gethrestime_sec();
5189 		/*
5190 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5191 		 * value.
5192 		 */
5193 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5194 		newbie->ipsacq_seq = seq;
5195 		newbie->ipsacq_addrfam = af;
5196 
5197 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
5198 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
5199 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
5200 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
5201 		if (tunnel_mode) {
5202 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
5203 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
5204 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5205 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
5206 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
5207 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5208 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
5209 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5210 			    io->ipsec_out_indst, io->ipsec_out_inaf);
5211 		} else {
5212 			newbie->ipsacq_proto = io->ipsec_out_proto;
5213 		}
5214 		newbie->ipsacq_unique_id = unique_id;
5215 	} else {
5216 		/* Scan to the end of the list & insert. */
5217 		mblk_t *lastone = newbie->ipsacq_mp;
5218 
5219 		while (lastone->b_next != NULL)
5220 			lastone = lastone->b_next;
5221 		lastone->b_next = mp;
5222 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5223 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5224 			lastone = newbie->ipsacq_mp;
5225 			newbie->ipsacq_mp = lastone->b_next;
5226 			lastone->b_next = NULL;
5227 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
5228 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5229 			    &ipss->ipsec_sadb_dropper);
5230 		} else {
5231 			IP_ACQUIRE_STAT(ipss, qhiwater,
5232 			    newbie->ipsacq_numpackets);
5233 		}
5234 	}
5235 
5236 	/*
5237 	 * Reset addresses.  Set them to the most recently added mblk chain,
5238 	 * so that the address pointers in the acquire record will point
5239 	 * at an mblk still attached to the acquire list.
5240 	 */
5241 
5242 	newbie->ipsacq_srcaddr = src;
5243 	newbie->ipsacq_dstaddr = dst;
5244 
5245 	/*
5246 	 * If the acquire record has more than one queued packet, we've
5247 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5248 	 */
5249 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5250 		/* I have an acquire outstanding already! */
5251 		mutex_exit(&newbie->ipsacq_lock);
5252 		return;
5253 	}
5254 
5255 	if (keysock_extended_reg(ns)) {
5256 		/*
5257 		 * Construct an extended ACQUIRE.  There are logging
5258 		 * opportunities here in failure cases.
5259 		 */
5260 
5261 		(void) memset(&sel, 0, sizeof (sel));
5262 		sel.ips_isv4 = io->ipsec_out_v4;
5263 		if (tunnel_mode) {
5264 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
5265 			    IPPROTO_ENCAP : IPPROTO_IPV6;
5266 		} else {
5267 			sel.ips_protocol = io->ipsec_out_proto;
5268 			sel.ips_local_port = io->ipsec_out_src_port;
5269 			sel.ips_remote_port = io->ipsec_out_dst_port;
5270 		}
5271 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
5272 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
5273 		sel.ips_is_icmp_inv_acq = 0;
5274 		if (af == AF_INET) {
5275 			sel.ips_local_addr_v4 = ipha->ipha_src;
5276 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
5277 		} else {
5278 			sel.ips_local_addr_v6 = ip6h->ip6_src;
5279 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5280 		}
5281 
5282 		extended = sadb_keysock_out(0);
5283 		if (extended != NULL) {
5284 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
5285 			    tunnel_mode, seq, 0, ns);
5286 			if (extended->b_cont == NULL) {
5287 				freeb(extended);
5288 				extended = NULL;
5289 			}
5290 		}
5291 	} else
5292 		extended = NULL;
5293 
5294 	/*
5295 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5296 	 * this new record.  The send-acquire callback assumes that acqrec is
5297 	 * already locked.
5298 	 */
5299 	(*spp->s_acqfn)(newbie, extended, ns);
5300 }
5301 
5302 /*
5303  * Unlink and free an acquire record.
5304  */
5305 void
5306 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5307 {
5308 	mblk_t *mp;
5309 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5310 
5311 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5312 
5313 	if (acqrec->ipsacq_policy != NULL) {
5314 		IPPOL_REFRELE(acqrec->ipsacq_policy, ns);
5315 	}
5316 	if (acqrec->ipsacq_act != NULL) {
5317 		IPACT_REFRELE(acqrec->ipsacq_act);
5318 	}
5319 
5320 	/* Unlink */
5321 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5322 	if (acqrec->ipsacq_next != NULL)
5323 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5324 
5325 	/*
5326 	 * Free hanging mp's.
5327 	 *
5328 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5329 	 */
5330 
5331 	mutex_enter(&acqrec->ipsacq_lock);
5332 	while (acqrec->ipsacq_mp != NULL) {
5333 		mp = acqrec->ipsacq_mp;
5334 		acqrec->ipsacq_mp = mp->b_next;
5335 		mp->b_next = NULL;
5336 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
5337 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5338 		    &ipss->ipsec_sadb_dropper);
5339 	}
5340 	mutex_exit(&acqrec->ipsacq_lock);
5341 
5342 	/* Free */
5343 	mutex_destroy(&acqrec->ipsacq_lock);
5344 	kmem_free(acqrec, sizeof (*acqrec));
5345 }
5346 
5347 /*
5348  * Destroy an acquire list fanout.
5349  */
5350 static void
5351 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5352     netstack_t *ns)
5353 {
5354 	int i;
5355 	iacqf_t *list = *listp;
5356 
5357 	if (list == NULL)
5358 		return;
5359 
5360 	for (i = 0; i < numentries; i++) {
5361 		mutex_enter(&(list[i].iacqf_lock));
5362 		while (list[i].iacqf_ipsacq != NULL)
5363 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5364 		mutex_exit(&(list[i].iacqf_lock));
5365 		if (forever)
5366 			mutex_destroy(&(list[i].iacqf_lock));
5367 	}
5368 
5369 	if (forever) {
5370 		*listp = NULL;
5371 		kmem_free(list, numentries * sizeof (*list));
5372 	}
5373 }
5374 
5375 /*
5376  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5377  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5378  */
5379 static uint8_t *
5380 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5381     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5382     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5383 {
5384 	uint8_t *cur = start;
5385 	ipsec_alginfo_t *algp;
5386 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5387 
5388 	cur += sizeof (*algdesc);
5389 	if (cur >= limit)
5390 		return (NULL);
5391 
5392 	ecomb->sadb_x_ecomb_numalgs++;
5393 
5394 	/*
5395 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5396 	 * a stronger policy, and when the framework loads a stronger version,
5397 	 * you can just keep plowing w/o rewhacking your SPD.
5398 	 */
5399 	mutex_enter(&ipss->ipsec_alg_lock);
5400 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5401 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5402 	if (algp == NULL) {
5403 		mutex_exit(&ipss->ipsec_alg_lock);
5404 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5405 	}
5406 	if (minbits < algp->alg_ef_minbits)
5407 		minbits = algp->alg_ef_minbits;
5408 	if (maxbits > algp->alg_ef_maxbits)
5409 		maxbits = algp->alg_ef_maxbits;
5410 	mutex_exit(&ipss->ipsec_alg_lock);
5411 
5412 	algdesc->sadb_x_algdesc_satype = satype;
5413 	algdesc->sadb_x_algdesc_algtype = algtype;
5414 	algdesc->sadb_x_algdesc_alg = alg;
5415 	algdesc->sadb_x_algdesc_minbits = minbits;
5416 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5417 	algdesc->sadb_x_algdesc_reserved = 0;
5418 	return (cur);
5419 }
5420 
5421 /*
5422  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5423  * which must fit before *limit
5424  *
5425  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5426  */
5427 static uint8_t *
5428 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5429     netstack_t *ns)
5430 {
5431 	uint8_t *cur = start;
5432 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5433 	ipsec_prot_t *ipp;
5434 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5435 
5436 	cur += sizeof (*ecomb);
5437 	if (cur >= limit)
5438 		return (NULL);
5439 
5440 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5441 
5442 	ipp = &act->ipa_act.ipa_apply;
5443 
5444 	ecomb->sadb_x_ecomb_numalgs = 0;
5445 	ecomb->sadb_x_ecomb_reserved = 0;
5446 	ecomb->sadb_x_ecomb_reserved2 = 0;
5447 	/*
5448 	 * No limits on allocations, since we really don't support that
5449 	 * concept currently.
5450 	 */
5451 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5452 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5453 
5454 	/*
5455 	 * XXX TBD: Policy or global parameters will eventually be
5456 	 * able to fill in some of these.
5457 	 */
5458 	ecomb->sadb_x_ecomb_flags = 0;
5459 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5460 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5461 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5462 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5463 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5464 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5465 
5466 	if (ipp->ipp_use_ah) {
5467 		cur = sadb_new_algdesc(cur, limit, ecomb,
5468 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5469 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5470 		if (cur == NULL)
5471 			return (NULL);
5472 		ipsecah_fill_defs(ecomb, ns);
5473 	}
5474 
5475 	if (ipp->ipp_use_esp) {
5476 		if (ipp->ipp_use_espa) {
5477 			cur = sadb_new_algdesc(cur, limit, ecomb,
5478 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5479 			    ipp->ipp_esp_auth_alg,
5480 			    ipp->ipp_espa_minbits,
5481 			    ipp->ipp_espa_maxbits, ipss);
5482 			if (cur == NULL)
5483 				return (NULL);
5484 		}
5485 
5486 		cur = sadb_new_algdesc(cur, limit, ecomb,
5487 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5488 		    ipp->ipp_encr_alg,
5489 		    ipp->ipp_espe_minbits,
5490 		    ipp->ipp_espe_maxbits, ipss);
5491 		if (cur == NULL)
5492 			return (NULL);
5493 		/* Fill in lifetimes if and only if AH didn't already... */
5494 		if (!ipp->ipp_use_ah)
5495 			ipsecesp_fill_defs(ecomb, ns);
5496 	}
5497 
5498 	return (cur);
5499 }
5500 
5501 /*
5502  * Construct an extended ACQUIRE message based on a selector and the resulting
5503  * IPsec action.
5504  *
5505  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5506  * generation. As a consequence, expect this function to evolve
5507  * rapidly.
5508  */
5509 static mblk_t *
5510 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5511     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5512     netstack_t *ns)
5513 {
5514 	mblk_t *mp;
5515 	sadb_msg_t *samsg;
5516 	uint8_t *start, *cur, *end;
5517 	uint32_t *saddrptr, *daddrptr;
5518 	sa_family_t af;
5519 	sadb_prop_t *eprop;
5520 	ipsec_action_t *ap, *an;
5521 	ipsec_selkey_t *ipsl;
5522 	uint8_t proto, pfxlen;
5523 	uint16_t lport, rport;
5524 	uint32_t kmp, kmc;
5525 
5526 	/*
5527 	 * Find the action we want sooner rather than later..
5528 	 */
5529 	an = NULL;
5530 	if (pol == NULL) {
5531 		ap = act;
5532 	} else {
5533 		ap = pol->ipsp_act;
5534 
5535 		if (ap != NULL)
5536 			an = ap->ipa_next;
5537 	}
5538 
5539 	/*
5540 	 * Just take a swag for the allocation for now.	 We can always
5541 	 * alter it later.
5542 	 */
5543 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5544 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5545 	if (mp == NULL)
5546 		return (NULL);
5547 
5548 	start = mp->b_rptr;
5549 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5550 
5551 	cur = start;
5552 
5553 	samsg = (sadb_msg_t *)cur;
5554 	cur += sizeof (*samsg);
5555 
5556 	samsg->sadb_msg_version = PF_KEY_V2;
5557 	samsg->sadb_msg_type = SADB_ACQUIRE;
5558 	samsg->sadb_msg_errno = 0;
5559 	samsg->sadb_msg_reserved = 0;
5560 	samsg->sadb_msg_satype = 0;
5561 	samsg->sadb_msg_seq = seq;
5562 	samsg->sadb_msg_pid = pid;
5563 
5564 	if (tunnel_mode) {
5565 		/*
5566 		 * Form inner address extensions based NOT on the inner
5567 		 * selectors (i.e. the packet data), but on the policy's
5568 		 * selector key (i.e. the policy's selector information).
5569 		 *
5570 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5571 		 * same in ipsec_selkey_t (unless the compiler does very
5572 		 * strange things with unions, consult your local C language
5573 		 * lawyer for details).
5574 		 */
5575 		ASSERT(pol != NULL);
5576 
5577 		ipsl = &(pol->ipsp_sel->ipsl_key);
5578 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5579 			af = AF_INET;
5580 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5581 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5582 		} else {
5583 			af = AF_INET6;
5584 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5585 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5586 		}
5587 
5588 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5589 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5590 			pfxlen = ipsl->ipsl_local_pfxlen;
5591 		} else {
5592 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5593 			pfxlen = 0;
5594 		}
5595 		/* XXX What about ICMP type/code? */
5596 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5597 		    ipsl->ipsl_lport : 0;
5598 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5599 		    ipsl->ipsl_proto : 0;
5600 
5601 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5602 		    af, saddrptr, lport, proto, pfxlen);
5603 		if (cur == NULL) {
5604 			freeb(mp);
5605 			return (NULL);
5606 		}
5607 
5608 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5609 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5610 			pfxlen = ipsl->ipsl_remote_pfxlen;
5611 		} else {
5612 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5613 			pfxlen = 0;
5614 		}
5615 		/* XXX What about ICMP type/code? */
5616 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5617 		    ipsl->ipsl_rport : 0;
5618 
5619 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5620 		    af, daddrptr, rport, proto, pfxlen);
5621 		if (cur == NULL) {
5622 			freeb(mp);
5623 			return (NULL);
5624 		}
5625 		/*
5626 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5627 		 * _with_ inner-packet address selectors, we'll need to further
5628 		 * distinguish tunnel mode here.  For now, having inner
5629 		 * addresses and/or ports is sufficient.
5630 		 *
5631 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5632 		 * outer addresses.
5633 		 */
5634 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5635 		lport = rport = 0;
5636 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5637 		proto = 0;
5638 		lport = 0;
5639 		rport = 0;
5640 		if (pol != NULL) {
5641 			ipsl = &(pol->ipsp_sel->ipsl_key);
5642 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5643 				proto = ipsl->ipsl_proto;
5644 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5645 				rport = ipsl->ipsl_rport;
5646 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5647 				lport = ipsl->ipsl_lport;
5648 		}
5649 	} else {
5650 		proto = sel->ips_protocol;
5651 		lport = sel->ips_local_port;
5652 		rport = sel->ips_remote_port;
5653 	}
5654 
5655 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5656 
5657 	/*
5658 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5659 	 * ipsec_selector_t.
5660 	 */
5661 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5662 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5663 
5664 	if (cur == NULL) {
5665 		freeb(mp);
5666 		return (NULL);
5667 	}
5668 
5669 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5670 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5671 
5672 	if (cur == NULL) {
5673 		freeb(mp);
5674 		return (NULL);
5675 	}
5676 
5677 	/*
5678 	 * This section will change a lot as policy evolves.
5679 	 * For now, it'll be relatively simple.
5680 	 */
5681 	eprop = (sadb_prop_t *)cur;
5682 	cur += sizeof (*eprop);
5683 	if (cur > end) {
5684 		/* no space left */
5685 		freeb(mp);
5686 		return (NULL);
5687 	}
5688 
5689 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5690 	eprop->sadb_x_prop_ereserved = 0;
5691 	eprop->sadb_x_prop_numecombs = 0;
5692 	eprop->sadb_prop_replay = 32;	/* default */
5693 
5694 	kmc = kmp = 0;
5695 
5696 	for (; ap != NULL; ap = an) {
5697 		an = (pol != NULL) ? ap->ipa_next : NULL;
5698 
5699 		/*
5700 		 * Skip non-IPsec policies
5701 		 */
5702 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5703 			continue;
5704 
5705 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5706 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5707 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5708 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5709 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5710 			eprop->sadb_prop_replay =
5711 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5712 		}
5713 
5714 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5715 		if (cur == NULL) { /* no space */
5716 			freeb(mp);
5717 			return (NULL);
5718 		}
5719 		eprop->sadb_x_prop_numecombs++;
5720 	}
5721 
5722 	if (eprop->sadb_x_prop_numecombs == 0) {
5723 		/*
5724 		 * This will happen if we fail to find a policy
5725 		 * allowing for IPsec processing.
5726 		 * Construct an error message.
5727 		 */
5728 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5729 		samsg->sadb_msg_errno = ENOENT;
5730 		samsg->sadb_x_msg_diagnostic = 0;
5731 		return (mp);
5732 	}
5733 
5734 	if ((kmp != 0) || (kmc != 0)) {
5735 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5736 		if (cur == NULL) {
5737 			freeb(mp);
5738 			return (NULL);
5739 		}
5740 	}
5741 
5742 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5743 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5744 	mp->b_wptr = cur;
5745 
5746 	return (mp);
5747 }
5748 
5749 /*
5750  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5751  *
5752  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5753  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5754  * maximize code consolidation while preventing algorithm changes from messing
5755  * with the callers finishing touches on the ACQUIRE itself.
5756  */
5757 mblk_t *
5758 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5759 {
5760 	uint_t allocsize;
5761 	mblk_t *pfkeymp, *msgmp;
5762 	sa_family_t af;
5763 	uint8_t *cur, *end;
5764 	sadb_msg_t *samsg;
5765 	uint16_t sport_typecode;
5766 	uint16_t dport_typecode;
5767 	uint8_t check_proto;
5768 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5769 
5770 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5771 
5772 	pfkeymp = sadb_keysock_out(0);
5773 	if (pfkeymp == NULL)
5774 		return (NULL);
5775 
5776 	/*
5777 	 * First, allocate a basic ACQUIRE message
5778 	 */
5779 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5780 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5781 
5782 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5783 	allocsize += 2 * sizeof (struct sockaddr_in6);
5784 
5785 	mutex_enter(&ipss->ipsec_alg_lock);
5786 	/* NOTE:  The lock is now held through to this function's return. */
5787 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5788 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5789 
5790 	if (tunnel_mode) {
5791 		/* Tunnel mode! */
5792 		allocsize += 2 * sizeof (sadb_address_t);
5793 		/* Enough to cover both AF_INET and AF_INET6. */
5794 		allocsize += 2 * sizeof (struct sockaddr_in6);
5795 	}
5796 
5797 	msgmp = allocb(allocsize, BPRI_HI);
5798 	if (msgmp == NULL) {
5799 		freeb(pfkeymp);
5800 		mutex_exit(&ipss->ipsec_alg_lock);
5801 		return (NULL);
5802 	}
5803 
5804 	pfkeymp->b_cont = msgmp;
5805 	cur = msgmp->b_rptr;
5806 	end = cur + allocsize;
5807 	samsg = (sadb_msg_t *)cur;
5808 	cur += sizeof (sadb_msg_t);
5809 
5810 	af = acqrec->ipsacq_addrfam;
5811 	switch (af) {
5812 	case AF_INET:
5813 		check_proto = IPPROTO_ICMP;
5814 		break;
5815 	case AF_INET6:
5816 		check_proto = IPPROTO_ICMPV6;
5817 		break;
5818 	default:
5819 		/* This should never happen unless we have kernel bugs. */
5820 		cmn_err(CE_WARN,
5821 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5822 		ASSERT(0);
5823 		mutex_exit(&ipss->ipsec_alg_lock);
5824 		return (NULL);
5825 	}
5826 
5827 	samsg->sadb_msg_version = PF_KEY_V2;
5828 	samsg->sadb_msg_type = SADB_ACQUIRE;
5829 	samsg->sadb_msg_satype = satype;
5830 	samsg->sadb_msg_errno = 0;
5831 	samsg->sadb_msg_pid = 0;
5832 	samsg->sadb_msg_reserved = 0;
5833 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5834 
5835 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5836 
5837 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5838 		sport_typecode = dport_typecode = 0;
5839 	} else {
5840 		sport_typecode = acqrec->ipsacq_srcport;
5841 		dport_typecode = acqrec->ipsacq_dstport;
5842 	}
5843 
5844 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5845 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5846 
5847 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5848 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5849 
5850 	if (tunnel_mode) {
5851 		sport_typecode = acqrec->ipsacq_srcport;
5852 		dport_typecode = acqrec->ipsacq_dstport;
5853 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5854 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5855 		    sport_typecode, acqrec->ipsacq_inner_proto,
5856 		    acqrec->ipsacq_innersrcpfx);
5857 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5858 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5859 		    dport_typecode, acqrec->ipsacq_inner_proto,
5860 		    acqrec->ipsacq_innerdstpfx);
5861 	}
5862 
5863 	/* XXX Insert identity information here. */
5864 
5865 	/* XXXMLS Insert sensitivity information here. */
5866 
5867 	if (cur != NULL)
5868 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5869 	else
5870 		mutex_exit(&ipss->ipsec_alg_lock);
5871 
5872 	return (pfkeymp);
5873 }
5874 
5875 /*
5876  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5877  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5878  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5879  * (ipsa_t *)-1).
5880  *
5881  * master_spi is passed in host order.
5882  */
5883 ipsa_t *
5884 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5885     netstack_t *ns, uint_t sa_type)
5886 {
5887 	sadb_address_t *src =
5888 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5889 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5890 	sadb_spirange_t *range =
5891 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5892 	struct sockaddr_in *ssa, *dsa;
5893 	struct sockaddr_in6 *ssa6, *dsa6;
5894 	uint32_t *srcaddr, *dstaddr;
5895 	sa_family_t af;
5896 	uint32_t add, min, max;
5897 	uint8_t protocol =
5898 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5899 
5900 	if (src == NULL) {
5901 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5902 		return ((ipsa_t *)-1);
5903 	}
5904 	if (dst == NULL) {
5905 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5906 		return ((ipsa_t *)-1);
5907 	}
5908 	if (range == NULL) {
5909 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5910 		return ((ipsa_t *)-1);
5911 	}
5912 
5913 	min = ntohl(range->sadb_spirange_min);
5914 	max = ntohl(range->sadb_spirange_max);
5915 	dsa = (struct sockaddr_in *)(dst + 1);
5916 	dsa6 = (struct sockaddr_in6 *)dsa;
5917 
5918 	ssa = (struct sockaddr_in *)(src + 1);
5919 	ssa6 = (struct sockaddr_in6 *)ssa;
5920 	ASSERT(dsa->sin_family == ssa->sin_family);
5921 
5922 	srcaddr = ALL_ZEROES_PTR;
5923 	af = dsa->sin_family;
5924 	switch (af) {
5925 	case AF_INET:
5926 		if (src != NULL)
5927 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5928 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5929 		break;
5930 	case AF_INET6:
5931 		if (src != NULL)
5932 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5933 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5934 		break;
5935 	default:
5936 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5937 		return ((ipsa_t *)-1);
5938 	}
5939 
5940 	if (master_spi < min || master_spi > max) {
5941 		/* Return a random value in the range. */
5942 		if (cl_inet_getspi) {
5943 			cl_inet_getspi(ns->netstack_stackid, protocol,
5944 			    (uint8_t *)&add, sizeof (add), NULL);
5945 		} else {
5946 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5947 			    sizeof (add));
5948 		}
5949 		master_spi = min + (add % (max - min + 1));
5950 	}
5951 
5952 	/*
5953 	 * Since master_spi is passed in host order, we need to htonl() it
5954 	 * for the purposes of creating a new SA.
5955 	 */
5956 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5957 	    ns));
5958 }
5959 
5960 /*
5961  *
5962  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5963  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5964  * and scan for the sequence number in question.  I may wish to accept an
5965  * address pair with it, for easier searching.
5966  *
5967  * Caller frees the message, so we don't have to here.
5968  *
5969  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5970  *		failures.
5971  */
5972 /* ARGSUSED */
5973 void
5974 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns)
5975 {
5976 	int i;
5977 	ipsacq_t *acqrec;
5978 	iacqf_t *bucket;
5979 
5980 	/*
5981 	 * I only accept the base header for this!
5982 	 * Though to be honest, requiring the dst address would help
5983 	 * immensely.
5984 	 *
5985 	 * XXX	There are already cases where I can get the dst address.
5986 	 */
5987 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5988 		return;
5989 
5990 	/*
5991 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5992 	 * (and in the future send a message to IP with the appropriate error
5993 	 * number).
5994 	 *
5995 	 * Q: Do I want to reject if pid != 0?
5996 	 */
5997 
5998 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5999 		bucket = &sp->s_v4.sdb_acq[i];
6000 		mutex_enter(&bucket->iacqf_lock);
6001 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6002 		    acqrec = acqrec->ipsacq_next) {
6003 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6004 				break;	/* for acqrec... loop. */
6005 		}
6006 		if (acqrec != NULL)
6007 			break;	/* for i = 0... loop. */
6008 
6009 		mutex_exit(&bucket->iacqf_lock);
6010 	}
6011 
6012 	if (acqrec == NULL) {
6013 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6014 			bucket = &sp->s_v6.sdb_acq[i];
6015 			mutex_enter(&bucket->iacqf_lock);
6016 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6017 			    acqrec = acqrec->ipsacq_next) {
6018 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6019 					break;	/* for acqrec... loop. */
6020 			}
6021 			if (acqrec != NULL)
6022 				break;	/* for i = 0... loop. */
6023 
6024 			mutex_exit(&bucket->iacqf_lock);
6025 		}
6026 	}
6027 
6028 
6029 	if (acqrec == NULL)
6030 		return;
6031 
6032 	/*
6033 	 * What do I do with the errno and IP?	I may need mp's services a
6034 	 * little more.	 See sadb_destroy_acquire() for future directions
6035 	 * beyond free the mblk chain on the acquire record.
6036 	 */
6037 
6038 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6039 	sadb_destroy_acquire(acqrec, ns);
6040 	/* Have to exit mutex here, because of breaking out of for loop. */
6041 	mutex_exit(&bucket->iacqf_lock);
6042 }
6043 
6044 /*
6045  * The following functions work with the replay windows of an SA.  They assume
6046  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6047  * represents the highest sequence number packet received, and back
6048  * (ipsa->ipsa_replay_wsize) packets.
6049  */
6050 
6051 /*
6052  * Is the replay bit set?
6053  */
6054 static boolean_t
6055 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6056 {
6057 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6058 
6059 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6060 }
6061 
6062 /*
6063  * Shift the bits of the replay window over.
6064  */
6065 static void
6066 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6067 {
6068 	int i;
6069 	int jump = ((shift - 1) >> 6) + 1;
6070 
6071 	if (shift == 0)
6072 		return;
6073 
6074 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6075 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6076 			ipsa->ipsa_replay_arr[i + jump] |=
6077 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6078 		}
6079 		ipsa->ipsa_replay_arr[i] <<= shift;
6080 	}
6081 }
6082 
6083 /*
6084  * Set a bit in the bit vector.
6085  */
6086 static void
6087 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6088 {
6089 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6090 
6091 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6092 }
6093 
6094 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6095 
6096 /*
6097  * Assume caller has NOT done ntohl() already on seq.  Check to see
6098  * if replay sequence number "seq" has been seen already.
6099  */
6100 boolean_t
6101 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6102 {
6103 	boolean_t rc;
6104 	uint32_t diff;
6105 
6106 	if (ipsa->ipsa_replay_wsize == 0)
6107 		return (B_TRUE);
6108 
6109 	/*
6110 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6111 	 */
6112 
6113 	/* Convert sequence number into host order before holding the mutex. */
6114 	seq = ntohl(seq);
6115 
6116 	mutex_enter(&ipsa->ipsa_lock);
6117 
6118 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6119 	if (ipsa->ipsa_replay == 0)
6120 		ipsa->ipsa_replay = 1;
6121 
6122 	if (seq > ipsa->ipsa_replay) {
6123 		/*
6124 		 * I have received a new "highest value received".  Shift
6125 		 * the replay window over.
6126 		 */
6127 		diff = seq - ipsa->ipsa_replay;
6128 		if (diff < ipsa->ipsa_replay_wsize) {
6129 			/* In replay window, shift bits over. */
6130 			ipsa_shift_replay(ipsa, diff);
6131 		} else {
6132 			/* WAY FAR AHEAD, clear bits and start again. */
6133 			bzero(ipsa->ipsa_replay_arr,
6134 			    sizeof (ipsa->ipsa_replay_arr));
6135 		}
6136 		ipsa_set_replay(ipsa, 0);
6137 		ipsa->ipsa_replay = seq;
6138 		rc = B_TRUE;
6139 		goto done;
6140 	}
6141 	diff = ipsa->ipsa_replay - seq;
6142 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6143 		rc = B_FALSE;
6144 		goto done;
6145 	}
6146 	/* Set this packet as seen. */
6147 	ipsa_set_replay(ipsa, diff);
6148 
6149 	rc = B_TRUE;
6150 done:
6151 	mutex_exit(&ipsa->ipsa_lock);
6152 	return (rc);
6153 }
6154 
6155 /*
6156  * "Peek" and see if we should even bother going through the effort of
6157  * running an authentication check on the sequence number passed in.
6158  * this takes into account packets that are below the replay window,
6159  * and collisions with already replayed packets.  Return B_TRUE if it
6160  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6161  * Assume same byte-ordering as sadb_replay_check.
6162  */
6163 boolean_t
6164 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6165 {
6166 	boolean_t rc = B_FALSE;
6167 	uint32_t diff;
6168 
6169 	if (ipsa->ipsa_replay_wsize == 0)
6170 		return (B_TRUE);
6171 
6172 	/*
6173 	 * 0 is 0, regardless of byte order... :)
6174 	 *
6175 	 * If I get 0 on the wire (and there is a replay window) then the
6176 	 * sender most likely wrapped.	This ipsa may need to be marked or
6177 	 * something.
6178 	 */
6179 	if (seq == 0)
6180 		return (B_FALSE);
6181 
6182 	seq = ntohl(seq);
6183 	mutex_enter(&ipsa->ipsa_lock);
6184 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6185 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6186 		goto done;
6187 
6188 	/*
6189 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6190 	 * bother with formalities.  I'm not accepting any more packets
6191 	 * on this SA.
6192 	 */
6193 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6194 		/*
6195 		 * Since we're already holding the lock, update the
6196 		 * expire time ala. sadb_replay_delete() and return.
6197 		 */
6198 		ipsa->ipsa_hardexpiretime = (time_t)1;
6199 		goto done;
6200 	}
6201 
6202 	if (seq <= ipsa->ipsa_replay) {
6203 		/*
6204 		 * This seq is in the replay window.  I'm not below it,
6205 		 * because I already checked for that above!
6206 		 */
6207 		diff = ipsa->ipsa_replay - seq;
6208 		if (ipsa_is_replay_set(ipsa, diff))
6209 			goto done;
6210 	}
6211 	/* Else return B_TRUE, I'm going to advance the window. */
6212 
6213 	rc = B_TRUE;
6214 done:
6215 	mutex_exit(&ipsa->ipsa_lock);
6216 	return (rc);
6217 }
6218 
6219 /*
6220  * Delete a single SA.
6221  *
6222  * For now, use the quick-and-dirty trick of making the association's
6223  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6224  */
6225 void
6226 sadb_replay_delete(ipsa_t *assoc)
6227 {
6228 	mutex_enter(&assoc->ipsa_lock);
6229 	assoc->ipsa_hardexpiretime = (time_t)1;
6230 	mutex_exit(&assoc->ipsa_lock);
6231 }
6232 
6233 /*
6234  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
6235  * down.  The caller will handle the T_BIND_ACK locally.
6236  */
6237 boolean_t
6238 sadb_t_bind_req(queue_t *q, int proto)
6239 {
6240 	struct T_bind_req *tbr;
6241 	mblk_t *mp;
6242 
6243 	mp = allocb_cred(sizeof (struct T_bind_req) + 1, kcred, NOPID);
6244 	if (mp == NULL) {
6245 		/* cmn_err(CE_WARN, */
6246 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
6247 		return (B_FALSE);
6248 	}
6249 	mp->b_datap->db_type = M_PCPROTO;
6250 	tbr = (struct T_bind_req *)mp->b_rptr;
6251 	mp->b_wptr += sizeof (struct T_bind_req);
6252 	tbr->PRIM_type = T_BIND_REQ;
6253 	tbr->ADDR_length = 0;
6254 	tbr->ADDR_offset = 0;
6255 	tbr->CONIND_number = 0;
6256 	*mp->b_wptr = (uint8_t)proto;
6257 	mp->b_wptr++;
6258 
6259 	putnext(q, mp);
6260 	return (B_TRUE);
6261 }
6262 
6263 /*
6264  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6265  * this is designed to take only a format string with "* %x * %s *", so
6266  * that "spi" is printed first, then "addr" is converted using inet_pton().
6267  *
6268  * This is abstracted out to save the stack space for only when inet_pton()
6269  * is called.  Make sure "spi" is in network order; it usually is when this
6270  * would get called.
6271  */
6272 void
6273 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6274     uint32_t spi, void *addr, int af, netstack_t *ns)
6275 {
6276 	char buf[INET6_ADDRSTRLEN];
6277 
6278 	ASSERT(af == AF_INET6 || af == AF_INET);
6279 
6280 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6281 	    inet_ntop(af, addr, buf, sizeof (buf)));
6282 }
6283 
6284 /*
6285  * Fills in a reference to the policy, if any, from the conn, in *ppp
6286  * Releases a reference to the passed conn_t.
6287  */
6288 static void
6289 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6290 {
6291 	ipsec_policy_t	*pp;
6292 	ipsec_latch_t	*ipl = connp->conn_latch;
6293 
6294 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
6295 		pp = ipl->ipl_out_policy;
6296 		IPPOL_REFHOLD(pp);
6297 	} else {
6298 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel,
6299 		    connp->conn_netstack);
6300 	}
6301 	*ppp = pp;
6302 	CONN_DEC_REF(connp);
6303 }
6304 
6305 /*
6306  * The following functions scan through active conn_t structures
6307  * and return a reference to the best-matching policy it can find.
6308  * Caller must release the reference.
6309  */
6310 static void
6311 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6312 {
6313 	connf_t *connfp;
6314 	conn_t *connp = NULL;
6315 	ipsec_selector_t portonly;
6316 
6317 	bzero((void *)&portonly, sizeof (portonly));
6318 
6319 	if (sel->ips_local_port == 0)
6320 		return;
6321 
6322 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6323 	    ipst)];
6324 	mutex_enter(&connfp->connf_lock);
6325 
6326 	if (sel->ips_isv4) {
6327 		connp = connfp->connf_head;
6328 		while (connp != NULL) {
6329 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6330 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6331 			    sel->ips_remote_addr_v4))
6332 				break;
6333 			connp = connp->conn_next;
6334 		}
6335 
6336 		if (connp == NULL) {
6337 			/* Try port-only match in IPv6. */
6338 			portonly.ips_local_port = sel->ips_local_port;
6339 			sel = &portonly;
6340 		}
6341 	}
6342 
6343 	if (connp == NULL) {
6344 		connp = connfp->connf_head;
6345 		while (connp != NULL) {
6346 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6347 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6348 			    sel->ips_remote_addr_v6))
6349 				break;
6350 			connp = connp->conn_next;
6351 		}
6352 
6353 		if (connp == NULL) {
6354 			mutex_exit(&connfp->connf_lock);
6355 			return;
6356 		}
6357 	}
6358 
6359 	CONN_INC_REF(connp);
6360 	mutex_exit(&connfp->connf_lock);
6361 
6362 	ipsec_conn_pol(sel, connp, ppp);
6363 }
6364 
6365 static conn_t *
6366 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6367 {
6368 	connf_t *connfp;
6369 	conn_t *connp = NULL;
6370 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6371 
6372 	if (sel->ips_local_port == 0)
6373 		return (NULL);
6374 
6375 	connfp = &ipst->ips_ipcl_bind_fanout[
6376 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6377 	mutex_enter(&connfp->connf_lock);
6378 
6379 	if (sel->ips_isv4) {
6380 		connp = connfp->connf_head;
6381 		while (connp != NULL) {
6382 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6383 			    sel->ips_local_addr_v4, pptr[1]))
6384 				break;
6385 			connp = connp->conn_next;
6386 		}
6387 
6388 		if (connp == NULL) {
6389 			/* Match to all-zeroes. */
6390 			v6addrmatch = &ipv6_all_zeros;
6391 		}
6392 	}
6393 
6394 	if (connp == NULL) {
6395 		connp = connfp->connf_head;
6396 		while (connp != NULL) {
6397 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6398 			    *v6addrmatch, pptr[1]))
6399 				break;
6400 			connp = connp->conn_next;
6401 		}
6402 
6403 		if (connp == NULL) {
6404 			mutex_exit(&connfp->connf_lock);
6405 			return (NULL);
6406 		}
6407 	}
6408 
6409 	CONN_INC_REF(connp);
6410 	mutex_exit(&connfp->connf_lock);
6411 	return (connp);
6412 }
6413 
6414 static void
6415 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6416 {
6417 	connf_t 	*connfp;
6418 	conn_t		*connp;
6419 	uint32_t	ports;
6420 	uint16_t	*pptr = (uint16_t *)&ports;
6421 
6422 	/*
6423 	 * Find TCP state in the following order:
6424 	 * 1.) Connected conns.
6425 	 * 2.) Listeners.
6426 	 *
6427 	 * Even though #2 will be the common case for inbound traffic, only
6428 	 * following this order insures correctness.
6429 	 */
6430 
6431 	if (sel->ips_local_port == 0)
6432 		return;
6433 
6434 	/*
6435 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6436 	 * See ipsec_construct_inverse_acquire() for details.
6437 	 */
6438 	pptr[0] = sel->ips_remote_port;
6439 	pptr[1] = sel->ips_local_port;
6440 
6441 	connfp = &ipst->ips_ipcl_conn_fanout[
6442 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6443 	mutex_enter(&connfp->connf_lock);
6444 	connp = connfp->connf_head;
6445 
6446 	if (sel->ips_isv4) {
6447 		while (connp != NULL) {
6448 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6449 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6450 			    ports))
6451 				break;
6452 			connp = connp->conn_next;
6453 		}
6454 	} else {
6455 		while (connp != NULL) {
6456 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6457 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6458 			    ports))
6459 				break;
6460 			connp = connp->conn_next;
6461 		}
6462 	}
6463 
6464 	if (connp != NULL) {
6465 		CONN_INC_REF(connp);
6466 		mutex_exit(&connfp->connf_lock);
6467 	} else {
6468 		mutex_exit(&connfp->connf_lock);
6469 
6470 		/* Try the listen hash. */
6471 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6472 			return;
6473 	}
6474 
6475 	ipsec_conn_pol(sel, connp, ppp);
6476 }
6477 
6478 static void
6479 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6480     ip_stack_t *ipst)
6481 {
6482 	conn_t		*connp;
6483 	uint32_t	ports;
6484 	uint16_t	*pptr = (uint16_t *)&ports;
6485 
6486 	/*
6487 	 * Find SCP state in the following order:
6488 	 * 1.) Connected conns.
6489 	 * 2.) Listeners.
6490 	 *
6491 	 * Even though #2 will be the common case for inbound traffic, only
6492 	 * following this order insures correctness.
6493 	 */
6494 
6495 	if (sel->ips_local_port == 0)
6496 		return;
6497 
6498 	/*
6499 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6500 	 * See ipsec_construct_inverse_acquire() for details.
6501 	 */
6502 	pptr[0] = sel->ips_remote_port;
6503 	pptr[1] = sel->ips_local_port;
6504 
6505 	if (sel->ips_isv4) {
6506 		in6_addr_t	src, dst;
6507 
6508 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6509 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6510 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6511 		    ipst->ips_netstack->netstack_sctp);
6512 	} else {
6513 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6514 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6515 		    ipst->ips_netstack->netstack_sctp);
6516 	}
6517 	if (connp == NULL)
6518 		return;
6519 	ipsec_conn_pol(sel, connp, ppp);
6520 }
6521 
6522 /*
6523  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6524  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6525  * to PF_KEY.
6526  *
6527  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6528  * ignore prefix lengths in the address extension.  Since we match on first-
6529  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6530  * set addresses to mask out the lower bits, we should get a suitable search
6531  * key for the SPD anyway.  This is the function to change if the assumption
6532  * about suitable search keys is wrong.
6533  */
6534 static int
6535 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6536     sadb_address_t *dstext, int *diagnostic)
6537 {
6538 	struct sockaddr_in *src, *dst;
6539 	struct sockaddr_in6 *src6, *dst6;
6540 
6541 	*diagnostic = 0;
6542 
6543 	bzero(sel, sizeof (*sel));
6544 	sel->ips_protocol = srcext->sadb_address_proto;
6545 	dst = (struct sockaddr_in *)(dstext + 1);
6546 	if (dst->sin_family == AF_INET6) {
6547 		dst6 = (struct sockaddr_in6 *)dst;
6548 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6549 		if (src6->sin6_family != AF_INET6) {
6550 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6551 			return (EINVAL);
6552 		}
6553 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6554 		sel->ips_local_addr_v6 = src6->sin6_addr;
6555 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6556 			sel->ips_is_icmp_inv_acq = 1;
6557 		} else {
6558 			sel->ips_remote_port = dst6->sin6_port;
6559 			sel->ips_local_port = src6->sin6_port;
6560 		}
6561 		sel->ips_isv4 = B_FALSE;
6562 	} else {
6563 		src = (struct sockaddr_in *)(srcext + 1);
6564 		if (src->sin_family != AF_INET) {
6565 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6566 			return (EINVAL);
6567 		}
6568 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6569 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6570 		if (sel->ips_protocol == IPPROTO_ICMP) {
6571 			sel->ips_is_icmp_inv_acq = 1;
6572 		} else {
6573 			sel->ips_remote_port = dst->sin_port;
6574 			sel->ips_local_port = src->sin_port;
6575 		}
6576 		sel->ips_isv4 = B_TRUE;
6577 	}
6578 	return (0);
6579 }
6580 
6581 /*
6582  * We have encapsulation.
6583  * - Lookup tun_t by address and look for an associated
6584  *   tunnel policy
6585  * - If there are inner selectors
6586  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6587  *   - Look up tunnel policy based on selectors
6588  * - Else
6589  *   - Sanity check the negotation
6590  *   - If appropriate, fall through to global policy
6591  */
6592 static int
6593 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6594     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6595     int *diagnostic, netstack_t *ns)
6596 {
6597 	int err;
6598 	ipsec_policy_head_t *polhead;
6599 
6600 	/* Check for inner selectors and act appropriately */
6601 
6602 	if (innsrcext != NULL) {
6603 		/* Inner selectors present */
6604 		ASSERT(inndstext != NULL);
6605 		if ((itp == NULL) ||
6606 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6607 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6608 			/*
6609 			 * If inner packet selectors, we must have negotiate
6610 			 * tunnel and active policy.  If the tunnel has
6611 			 * transport-mode policy set on it, or has no policy,
6612 			 * fail.
6613 			 */
6614 			return (ENOENT);
6615 		} else {
6616 			/*
6617 			 * Reset "sel" to indicate inner selectors.  Pass
6618 			 * inner PF_KEY address extensions for this to happen.
6619 			 */
6620 			if ((err = ipsec_get_inverse_acquire_sel(sel,
6621 			    innsrcext, inndstext, diagnostic)) != 0)
6622 				return (err);
6623 			/*
6624 			 * Now look for a tunnel policy based on those inner
6625 			 * selectors.  (Common code is below.)
6626 			 */
6627 		}
6628 	} else {
6629 		/* No inner selectors present */
6630 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6631 			/*
6632 			 * Transport mode negotiation with no tunnel policy
6633 			 * configured - return to indicate a global policy
6634 			 * check is needed.
6635 			 */
6636 			return (0);
6637 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6638 			/* Tunnel mode set with no inner selectors. */
6639 			return (ENOENT);
6640 		}
6641 		/*
6642 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6643 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6644 		 * itp with policy set based on any match, so don't bother
6645 		 * changing fields in "sel".
6646 		 */
6647 	}
6648 
6649 	ASSERT(itp != NULL);
6650 	polhead = itp->itp_policy;
6651 	ASSERT(polhead != NULL);
6652 	rw_enter(&polhead->iph_lock, RW_READER);
6653 	*ppp = ipsec_find_policy_head(NULL, polhead,
6654 	    IPSEC_TYPE_INBOUND, sel, ns);
6655 	rw_exit(&polhead->iph_lock);
6656 
6657 	/*
6658 	 * Don't default to global if we didn't find a matching policy entry.
6659 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6660 	 */
6661 	if (*ppp == NULL)
6662 		return (ENOENT);
6663 
6664 	return (0);
6665 }
6666 
6667 static void
6668 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6669     ip_stack_t *ipst)
6670 {
6671 	boolean_t	isv4 = sel->ips_isv4;
6672 	connf_t		*connfp;
6673 	conn_t		*connp;
6674 
6675 	if (isv4) {
6676 		connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol];
6677 	} else {
6678 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6679 	}
6680 
6681 	mutex_enter(&connfp->connf_lock);
6682 	for (connp = connfp->connf_head; connp != NULL;
6683 	    connp = connp->conn_next) {
6684 		if (!((isv4 && !((connp->conn_src == 0 ||
6685 		    connp->conn_src == sel->ips_local_addr_v4) &&
6686 		    (connp->conn_rem == 0 ||
6687 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
6688 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
6689 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
6690 		    &sel->ips_local_addr_v6)) &&
6691 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
6692 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
6693 		    &sel->ips_remote_addr_v6)))))) {
6694 			break;
6695 		}
6696 	}
6697 	if (connp == NULL) {
6698 		mutex_exit(&connfp->connf_lock);
6699 		return;
6700 	}
6701 
6702 	CONN_INC_REF(connp);
6703 	mutex_exit(&connfp->connf_lock);
6704 
6705 	ipsec_conn_pol(sel, connp, ppp);
6706 }
6707 
6708 /*
6709  * Construct an inverse ACQUIRE reply based on:
6710  *
6711  * 1.) Current global policy.
6712  * 2.) An conn_t match depending on what all was passed in the extv[].
6713  * 3.) A tunnel's policy head.
6714  * ...
6715  * N.) Other stuff TBD (e.g. identities)
6716  *
6717  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6718  * in this function so the caller can extract them where appropriately.
6719  *
6720  * The SRC address is the local one - just like an outbound ACQUIRE message.
6721  */
6722 mblk_t *
6723 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6724     netstack_t *ns)
6725 {
6726 	int err;
6727 	int diagnostic;
6728 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6729 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6730 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6731 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6732 	struct sockaddr_in6 *src, *dst;
6733 	struct sockaddr_in6 *isrc, *idst;
6734 	ipsec_tun_pol_t *itp = NULL;
6735 	ipsec_policy_t *pp = NULL;
6736 	ipsec_selector_t sel, isel;
6737 	mblk_t *retmp;
6738 	ip_stack_t	*ipst = ns->netstack_ip;
6739 
6740 	/* Normalize addresses */
6741 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6742 	    == KS_IN_ADDR_UNKNOWN) {
6743 		err = EINVAL;
6744 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6745 		goto bail;
6746 	}
6747 	src = (struct sockaddr_in6 *)(srcext + 1);
6748 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6749 	    == KS_IN_ADDR_UNKNOWN) {
6750 		err = EINVAL;
6751 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6752 		goto bail;
6753 	}
6754 	dst = (struct sockaddr_in6 *)(dstext + 1);
6755 	if (src->sin6_family != dst->sin6_family) {
6756 		err = EINVAL;
6757 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6758 		goto bail;
6759 	}
6760 
6761 	/* Check for tunnel mode and act appropriately */
6762 	if (innsrcext != NULL) {
6763 		if (inndstext == NULL) {
6764 			err = EINVAL;
6765 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6766 			goto bail;
6767 		}
6768 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6769 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6770 			err = EINVAL;
6771 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6772 			goto bail;
6773 		}
6774 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6775 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6776 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6777 			err = EINVAL;
6778 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6779 			goto bail;
6780 		}
6781 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6782 		if (isrc->sin6_family != idst->sin6_family) {
6783 			err = EINVAL;
6784 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6785 			goto bail;
6786 		}
6787 		if (isrc->sin6_family != AF_INET &&
6788 		    isrc->sin6_family != AF_INET6) {
6789 			err = EINVAL;
6790 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6791 			goto bail;
6792 		}
6793 	} else if (inndstext != NULL) {
6794 		err = EINVAL;
6795 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6796 		goto bail;
6797 	}
6798 
6799 	/* Get selectors first, based on outer addresses */
6800 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6801 	if (err != 0)
6802 		goto bail;
6803 
6804 	/* Check for tunnel mode mismatches. */
6805 	if (innsrcext != NULL &&
6806 	    ((isrc->sin6_family == AF_INET &&
6807 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6808 	    (isrc->sin6_family == AF_INET6 &&
6809 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6810 		err = EPROTOTYPE;
6811 		goto bail;
6812 	}
6813 
6814 	/*
6815 	 * Okay, we have the addresses and other selector information.
6816 	 * Let's first find a conn...
6817 	 */
6818 	pp = NULL;
6819 	switch (sel.ips_protocol) {
6820 	case IPPROTO_TCP:
6821 		ipsec_tcp_pol(&sel, &pp, ipst);
6822 		break;
6823 	case IPPROTO_UDP:
6824 		ipsec_udp_pol(&sel, &pp, ipst);
6825 		break;
6826 	case IPPROTO_SCTP:
6827 		ipsec_sctp_pol(&sel, &pp, ipst);
6828 		break;
6829 	case IPPROTO_ENCAP:
6830 	case IPPROTO_IPV6:
6831 		/*
6832 		 * Assume sel.ips_remote_addr_* has the right address at
6833 		 * that exact position.
6834 		 */
6835 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6836 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6837 		    ipst);
6838 
6839 		if (innsrcext == NULL) {
6840 			/*
6841 			 * Transport-mode tunnel, make sure we fake out isel
6842 			 * to contain something based on the outer protocol.
6843 			 */
6844 			bzero(&isel, sizeof (isel));
6845 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6846 		} /* Else isel is initialized by ipsec_tun_pol(). */
6847 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6848 		    &diagnostic, ns);
6849 		/*
6850 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6851 		 * may be.
6852 		 */
6853 		if (err != 0)
6854 			goto bail;
6855 		break;
6856 	default:
6857 		ipsec_oth_pol(&sel, &pp, ipst);
6858 		break;
6859 	}
6860 
6861 	/*
6862 	 * If we didn't find a matching conn_t or other policy head, take a
6863 	 * look in the global policy.
6864 	 */
6865 	if (pp == NULL) {
6866 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel,
6867 		    ns);
6868 		if (pp == NULL) {
6869 			/* There's no global policy. */
6870 			err = ENOENT;
6871 			diagnostic = 0;
6872 			goto bail;
6873 		}
6874 	}
6875 
6876 	/*
6877 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6878 	 * message based on that, fix fields where appropriate,
6879 	 * and return the message.
6880 	 */
6881 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6882 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6883 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns);
6884 	if (pp != NULL) {
6885 		IPPOL_REFRELE(pp, ns);
6886 	}
6887 	if (itp != NULL) {
6888 		ITP_REFRELE(itp, ns);
6889 	}
6890 	if (retmp != NULL) {
6891 		return (retmp);
6892 	} else {
6893 		err = ENOMEM;
6894 		diagnostic = 0;
6895 	}
6896 bail:
6897 	samsg->sadb_msg_errno = (uint8_t)err;
6898 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6899 	return (NULL);
6900 }
6901 
6902 /*
6903  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6904  * functions.  They have to hold the ipsa_lock because of potential races
6905  * between key management using SADB_UPDATE, and inbound packets that may
6906  * queue up on the larval SA (hence the 'l' in "lpkt").
6907  */
6908 
6909 /*
6910  * sadb_set_lpkt: Return TRUE if we can swap in a value to ipsa->ipsa_lpkt and
6911  * freemsg the previous value.  Return FALSE if we lost the race and the SA is
6912  * in a non-LARVAL state.  free clue: ip_drop_packet(NULL) is safe.
6913  */
6914 boolean_t
6915 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns)
6916 {
6917 	mblk_t *opkt;
6918 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
6919 	boolean_t is_larval;
6920 
6921 	/*
6922 	 * Check the packet's netstack id in case we go asynch with a
6923 	 * taskq_dispatch.
6924 	 */
6925 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_type == IPSEC_IN);
6926 	ASSERT(((ipsec_in_t *)npkt->b_rptr)->ipsec_in_stackid ==
6927 	    ns->netstack_stackid);
6928 
6929 	mutex_enter(&ipsa->ipsa_lock);
6930 	is_larval = (ipsa->ipsa_state == IPSA_STATE_LARVAL);
6931 	if (is_larval) {
6932 		opkt = ipsa->ipsa_lpkt;
6933 		ipsa->ipsa_lpkt = npkt;
6934 	} else {
6935 		/* We lost the race. */
6936 		opkt = NULL;
6937 		ASSERT(ipsa->ipsa_lpkt == NULL);
6938 	}
6939 	mutex_exit(&ipsa->ipsa_lock);
6940 
6941 	ip_drop_packet(opkt, B_TRUE, NULL, NULL,
6942 	    DROPPER(ipss, ipds_sadb_inlarval_replace),
6943 	    &ipss->ipsec_sadb_dropper);
6944 	return (is_larval);
6945 }
6946 
6947 /*
6948  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6949  * previous value.
6950  */
6951 mblk_t *
6952 sadb_clear_lpkt(ipsa_t *ipsa)
6953 {
6954 	mblk_t *opkt;
6955 
6956 	mutex_enter(&ipsa->ipsa_lock);
6957 	opkt = ipsa->ipsa_lpkt;
6958 	ipsa->ipsa_lpkt = NULL;
6959 	mutex_exit(&ipsa->ipsa_lock);
6960 
6961 	return (opkt);
6962 }
6963 
6964 /*
6965  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6966  */
6967 void
6968 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns)
6969 {
6970 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6971 	extern void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t,
6972 	    sa_family_t, in6_addr_t, in6_addr_t, void *);
6973 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6974 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6975 
6976 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6977 
6978 	if (cl_inet_idlesa == NULL) {
6979 		ip_drop_packet(bpkt, B_TRUE, NULL, NULL,
6980 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6981 		    &ipss->ipsec_sadb_dropper);
6982 		return;
6983 	}
6984 
6985 	cl_inet_idlesa(ns->netstack_stackid,
6986 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6987 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6988 
6989 	/*
6990 	 * Check the packet's netstack id in case we go asynch with a
6991 	 * taskq_dispatch.
6992 	 */
6993 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_type == IPSEC_IN);
6994 	ASSERT(((ipsec_in_t *)bpkt->b_rptr)->ipsec_in_stackid ==
6995 	    ns->netstack_stackid);
6996 
6997 	mutex_enter(&ipsa->ipsa_lock);
6998 	ipsa->ipsa_mblkcnt++;
6999 	if (ipsa->ipsa_bpkt_head == NULL) {
7000 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7001 	} else {
7002 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7003 		ipsa->ipsa_bpkt_tail = bpkt;
7004 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7005 			mblk_t *tmp;
7006 			tmp = ipsa->ipsa_bpkt_head;
7007 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7008 			ip_drop_packet(tmp, B_TRUE, NULL, NULL,
7009 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7010 			    &ipss->ipsec_sadb_dropper);
7011 			ipsa->ipsa_mblkcnt --;
7012 		}
7013 	}
7014 	mutex_exit(&ipsa->ipsa_lock);
7015 
7016 }
7017 
7018 /*
7019  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7020  * and put into STREAMS again.
7021  */
7022 void
7023 sadb_clear_buf_pkt(void *ipkt)
7024 {
7025 	mblk_t	*tmp, *buf_pkt;
7026 	netstack_t *ns;
7027 	ipsec_in_t *ii;
7028 
7029 	buf_pkt = (mblk_t *)ipkt;
7030 
7031 	ii = (ipsec_in_t *)buf_pkt->b_rptr;
7032 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
7033 	ns = netstack_find_by_stackid(ii->ipsec_in_stackid);
7034 	if (ns != NULL && ns != ii->ipsec_in_ns) {
7035 		netstack_rele(ns);
7036 		ns = NULL;  /* For while-loop below. */
7037 	}
7038 
7039 	while (buf_pkt != NULL) {
7040 		tmp = buf_pkt->b_next;
7041 		buf_pkt->b_next = NULL;
7042 		if (ns != NULL)
7043 			ip_fanout_proto_again(buf_pkt, NULL, NULL, NULL);
7044 		else
7045 			freemsg(buf_pkt);
7046 		buf_pkt = tmp;
7047 	}
7048 	if (ns != NULL)
7049 		netstack_rele(ns);
7050 }
7051 /*
7052  * Walker callback used by sadb_alg_update() to free/create crypto
7053  * context template when a crypto software provider is removed or
7054  * added.
7055  */
7056 
7057 struct sadb_update_alg_state {
7058 	ipsec_algtype_t alg_type;
7059 	uint8_t alg_id;
7060 	boolean_t is_added;
7061 };
7062 
7063 static void
7064 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7065 {
7066 	struct sadb_update_alg_state *update_state =
7067 	    (struct sadb_update_alg_state *)cookie;
7068 	crypto_ctx_template_t *ctx_tmpl = NULL;
7069 
7070 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7071 
7072 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7073 		return;
7074 
7075 	mutex_enter(&entry->ipsa_lock);
7076 
7077 	switch (update_state->alg_type) {
7078 	case IPSEC_ALG_AUTH:
7079 		if (entry->ipsa_auth_alg == update_state->alg_id)
7080 			ctx_tmpl = &entry->ipsa_authtmpl;
7081 		break;
7082 	case IPSEC_ALG_ENCR:
7083 		if (entry->ipsa_encr_alg == update_state->alg_id)
7084 			ctx_tmpl = &entry->ipsa_encrtmpl;
7085 		break;
7086 	default:
7087 		ctx_tmpl = NULL;
7088 	}
7089 
7090 	if (ctx_tmpl == NULL) {
7091 		mutex_exit(&entry->ipsa_lock);
7092 		return;
7093 	}
7094 
7095 	/*
7096 	 * The context template of the SA may be affected by the change
7097 	 * of crypto provider.
7098 	 */
7099 	if (update_state->is_added) {
7100 		/* create the context template if not already done */
7101 		if (*ctx_tmpl == NULL) {
7102 			(void) ipsec_create_ctx_tmpl(entry,
7103 			    update_state->alg_type);
7104 		}
7105 	} else {
7106 		/*
7107 		 * The crypto provider was removed. If the context template
7108 		 * exists but it is no longer valid, free it.
7109 		 */
7110 		if (*ctx_tmpl != NULL)
7111 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7112 	}
7113 
7114 	mutex_exit(&entry->ipsa_lock);
7115 }
7116 
7117 /*
7118  * Invoked by IP when an software crypto provider has been updated.
7119  * The type and id of the corresponding algorithm is passed as argument.
7120  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7121  * removed. The function updates the SADB and free/creates the
7122  * context templates associated with SAs if needed.
7123  */
7124 
7125 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7126     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7127 	&update_state)
7128 
7129 void
7130 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7131     netstack_t *ns)
7132 {
7133 	struct sadb_update_alg_state update_state;
7134 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7135 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7136 
7137 	update_state.alg_type = alg_type;
7138 	update_state.alg_id = alg_id;
7139 	update_state.is_added = is_added;
7140 
7141 	if (alg_type == IPSEC_ALG_AUTH) {
7142 		/* walk the AH tables only for auth. algorithm changes */
7143 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7144 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7145 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7146 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7147 	}
7148 
7149 	/* walk the ESP tables */
7150 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7151 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7152 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7153 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7154 }
7155 
7156 /*
7157  * Creates a context template for the specified SA. This function
7158  * is called when an SA is created and when a context template needs
7159  * to be created due to a change of software provider.
7160  */
7161 int
7162 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7163 {
7164 	ipsec_alginfo_t *alg;
7165 	crypto_mechanism_t mech;
7166 	crypto_key_t *key;
7167 	crypto_ctx_template_t *sa_tmpl;
7168 	int rv;
7169 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7170 
7171 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7172 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7173 
7174 	/* get pointers to the algorithm info, context template, and key */
7175 	switch (alg_type) {
7176 	case IPSEC_ALG_AUTH:
7177 		key = &sa->ipsa_kcfauthkey;
7178 		sa_tmpl = &sa->ipsa_authtmpl;
7179 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7180 		break;
7181 	case IPSEC_ALG_ENCR:
7182 		key = &sa->ipsa_kcfencrkey;
7183 		sa_tmpl = &sa->ipsa_encrtmpl;
7184 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7185 		break;
7186 	default:
7187 		alg = NULL;
7188 	}
7189 
7190 	if (alg == NULL || !ALG_VALID(alg))
7191 		return (EINVAL);
7192 
7193 	/* initialize the mech info structure for the framework */
7194 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7195 	mech.cm_type = alg->alg_mech_type;
7196 	mech.cm_param = NULL;
7197 	mech.cm_param_len = 0;
7198 
7199 	/* create a new context template */
7200 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7201 
7202 	/*
7203 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7204 	 * providers are available for that mechanism. In that case
7205 	 * we don't fail, and will generate the context template from
7206 	 * the framework callback when a software provider for that
7207 	 * mechanism registers.
7208 	 *
7209 	 * The context template is assigned the special value
7210 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7211 	 * lack of memory. No attempt will be made to use
7212 	 * the context template if it is set to this value.
7213 	 */
7214 	if (rv == CRYPTO_HOST_MEMORY) {
7215 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7216 	} else if (rv != CRYPTO_SUCCESS) {
7217 		*sa_tmpl = NULL;
7218 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7219 			return (EINVAL);
7220 	}
7221 
7222 	return (0);
7223 }
7224 
7225 /*
7226  * Destroy the context template of the specified algorithm type
7227  * of the specified SA. Must be called while holding the SA lock.
7228  */
7229 void
7230 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7231 {
7232 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7233 
7234 	if (alg_type == IPSEC_ALG_AUTH) {
7235 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7236 			sa->ipsa_authtmpl = NULL;
7237 		else if (sa->ipsa_authtmpl != NULL) {
7238 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7239 			sa->ipsa_authtmpl = NULL;
7240 		}
7241 	} else {
7242 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7243 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7244 			sa->ipsa_encrtmpl = NULL;
7245 		else if (sa->ipsa_encrtmpl != NULL) {
7246 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7247 			sa->ipsa_encrtmpl = NULL;
7248 		}
7249 	}
7250 }
7251 
7252 /*
7253  * Use the kernel crypto framework to check the validity of a key received
7254  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7255  */
7256 int
7257 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7258     boolean_t is_auth, int *diag)
7259 {
7260 	crypto_mechanism_t mech;
7261 	crypto_key_t crypto_key;
7262 	int crypto_rc;
7263 
7264 	mech.cm_type = mech_type;
7265 	mech.cm_param = NULL;
7266 	mech.cm_param_len = 0;
7267 
7268 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7269 	crypto_key.ck_data = sadb_key + 1;
7270 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7271 
7272 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7273 
7274 	switch (crypto_rc) {
7275 	case CRYPTO_SUCCESS:
7276 		return (0);
7277 	case CRYPTO_MECHANISM_INVALID:
7278 	case CRYPTO_MECH_NOT_SUPPORTED:
7279 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7280 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7281 		break;
7282 	case CRYPTO_KEY_SIZE_RANGE:
7283 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7284 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7285 		break;
7286 	case CRYPTO_WEAK_KEY:
7287 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7288 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7289 		break;
7290 	}
7291 
7292 	return (-1);
7293 }
7294 /*
7295  * If this is an outgoing SA then add some fuzz to the
7296  * SOFT EXPIRE time. The reason for this is to stop
7297  * peers trying to renegotiate SOFT expiring SA's at
7298  * the same time. The amount of fuzz needs to be at
7299  * least 8 seconds which is the typical interval
7300  * sadb_ager(), although this is only a guide as it
7301  * selftunes.
7302  */
7303 void
7304 lifetime_fuzz(ipsa_t *assoc)
7305 {
7306 	uint8_t rnd;
7307 
7308 	if (assoc->ipsa_softaddlt == 0)
7309 		return;
7310 
7311 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7312 	rnd = (rnd & 0xF) + 8;
7313 	assoc->ipsa_softexpiretime -= rnd;
7314 	assoc->ipsa_softaddlt -= rnd;
7315 }
7316 void
7317 destroy_ipsa_pair(ipsap_t *ipsapp)
7318 {
7319 	if (ipsapp == NULL)
7320 		return;
7321 
7322 	/*
7323 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7324 	 * them in { }.
7325 	 */
7326 	if (ipsapp->ipsap_sa_ptr != NULL) {
7327 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7328 	}
7329 	if (ipsapp->ipsap_psa_ptr != NULL) {
7330 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7331 	}
7332 
7333 	kmem_free(ipsapp, sizeof (*ipsapp));
7334 }
7335 
7336 /*
7337  * The sadb_ager() function walks through the hash tables of SA's and ages
7338  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7339  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7340  * SA appears in both the inbound and outbound tables because its not possible
7341  * to determine its direction) are placed on a list when they expire. This is
7342  * to ensure that pair/peer SA's are reaped at the same time, even if they
7343  * expire at different times.
7344  *
7345  * This function is called twice by sadb_ager(), one after processing the
7346  * inbound table, then again after processing the outbound table.
7347  */
7348 void
7349 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7350 {
7351 	templist_t *listptr;
7352 	int outhash;
7353 	isaf_t *bucket;
7354 	boolean_t haspeer;
7355 	ipsa_t *peer_assoc, *dying;
7356 	/*
7357 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7358 	 * is address independent.
7359 	 */
7360 	while (haspeerlist != NULL) {
7361 		/* "dying" contains the SA that has a peer. */
7362 		dying = haspeerlist->ipsa;
7363 		haspeer = (dying->ipsa_haspeer);
7364 		listptr = haspeerlist;
7365 		haspeerlist = listptr->next;
7366 		kmem_free(listptr, sizeof (*listptr));
7367 		/*
7368 		 * Pick peer bucket based on addrfam.
7369 		 */
7370 		if (outbound) {
7371 			if (haspeer)
7372 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7373 			else
7374 				bucket = INBOUND_BUCKET(sp,
7375 				    dying->ipsa_otherspi);
7376 		} else { /* inbound */
7377 			if (haspeer) {
7378 				if (dying->ipsa_addrfam == AF_INET6) {
7379 					outhash = OUTBOUND_HASH_V6(sp,
7380 					    *((in6_addr_t *)&dying->
7381 					    ipsa_dstaddr));
7382 				} else {
7383 					outhash = OUTBOUND_HASH_V4(sp,
7384 					    *((ipaddr_t *)&dying->
7385 					    ipsa_dstaddr));
7386 				}
7387 			} else if (dying->ipsa_addrfam == AF_INET6) {
7388 				outhash = OUTBOUND_HASH_V6(sp,
7389 				    *((in6_addr_t *)&dying->
7390 				    ipsa_srcaddr));
7391 			} else {
7392 				outhash = OUTBOUND_HASH_V4(sp,
7393 				    *((ipaddr_t *)&dying->
7394 				    ipsa_srcaddr));
7395 			}
7396 		bucket = &(sp->sdb_of[outhash]);
7397 		}
7398 
7399 		mutex_enter(&bucket->isaf_lock);
7400 		/*
7401 		 * "haspeer" SA's have the same src/dst address ordering,
7402 		 * "paired" SA's have the src/dst addresses reversed.
7403 		 */
7404 		if (haspeer) {
7405 			peer_assoc = ipsec_getassocbyspi(bucket,
7406 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7407 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7408 		} else {
7409 			peer_assoc = ipsec_getassocbyspi(bucket,
7410 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7411 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7412 		}
7413 
7414 		mutex_exit(&bucket->isaf_lock);
7415 		if (peer_assoc != NULL) {
7416 			mutex_enter(&peer_assoc->ipsa_lock);
7417 			mutex_enter(&dying->ipsa_lock);
7418 			if (!haspeer) {
7419 				/*
7420 				 * Only SA's which have a "peer" or are
7421 				 * "paired" end up on this list, so this
7422 				 * must be a "paired" SA, update the flags
7423 				 * to break the pair.
7424 				 */
7425 				peer_assoc->ipsa_otherspi = 0;
7426 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7427 				dying->ipsa_otherspi = 0;
7428 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7429 			}
7430 			if (haspeer || outbound) {
7431 				/*
7432 				 * Update the state of the "inbound" SA when
7433 				 * the "outbound" SA has expired. Don't update
7434 				 * the "outbound" SA when the "inbound" SA
7435 				 * SA expires because setting the hard_addtime
7436 				 * below will cause this to happen.
7437 				 */
7438 				peer_assoc->ipsa_state = dying->ipsa_state;
7439 			}
7440 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7441 				peer_assoc->ipsa_hardexpiretime = 1;
7442 
7443 			mutex_exit(&dying->ipsa_lock);
7444 			mutex_exit(&peer_assoc->ipsa_lock);
7445 			IPSA_REFRELE(peer_assoc);
7446 		}
7447 		IPSA_REFRELE(dying);
7448 	}
7449 }
7450